blob: 898e5bbda2c9f47d17394a0a51f3834960d67cb4 [file] [log] [blame]
cristy3ed852e2009-09-05 21:47:34 +00001#!/usr/bin/perl
2 eval 'exec perl -S $0 "$@"'
3 if $runnning_under_some_shell;
4#
5# txt2html.pl
6# Convert raw text to something with a little HTML formatting
7#
8# Written by Seth Golub <seth@cs.wustl.edu>
9# http://www.cs.wustl.edu/~seth/txt2html/
10#
11# $Revision: 1.15 $
12# $Date: 2004/10/05 20:30:33 $
13# $Author: cristy $
14#
15#
16# $Log: txt2html,v $
17# Revision 1.15 2004/10/05 20:30:33 cristy
18# *** empty log message ***
19#
20# Revision 1.14 2004/04/26 19:53:42 cristy
21# *** empty log message ***
22#
23# Revision 1.13 2004/04/26 15:03:00 cristy
24# *** empty log message ***
25#
26# Revision 1.12 2004/04/24 13:48:50 cristy
27# *** empty log message ***
28#
29# Revision 1.11 2003/10/28 18:40:59 cristy
30# *** empty log message ***
31#
32# Revision 1.10 2003/10/28 03:44:38 cristy
33# *** empty log message ***
34#
35# Revision 1.9 2003/10/17 13:59:12 cristy
36# *** empty log message ***
37#
38# Revision 1.8 2003/10/16 22:26:06 cristy
39# *** empty log message ***
40#
41# Revision 1.7 2003/10/12 04:10:15 cristy
42# *** empty log message ***
43#
44# Revision 1.6 2003/07/20 03:39:50 cristy
45# *** empty log message ***
46#
47# Revision 1.5 2003/07/19 19:44:20 cristy
48# *** empty log message ***
49#
50# Revision 1.4 2003/04/07 23:35:40 cristy
51# *** empty log message ***
52#
53# Revision 1.3 2003/04/05 02:52:42 cristy
54# *** empty log message ***
55#
56# Revision 1.2 2003/04/04 20:50:50 cristy
57# *** empty log message ***
58#
59# Revision 1.1 2003/03/25 15:10:23 cristy
60# genesis
61#
62# Revision 1.1 2003/03/22 17:02:00 cristy
63# *** empty log message ***
64#
65# Revision 1.10 1994/12/28 20:10:25 seth
66# * Added --extract, etc.
67#
68# Revision 1.9 94/12/13 15:16:23 15:16:23 seth (Seth Golub)
69# * Changed from #!/usr/local/bin/perl to the more clever version in
70# the man page. (How did I manage not to read this for so long?)
71# * Swapped hrule & header back to handle double lines. Why should
72# this order screw up headers?
73#
74# Revision 1.8 1994/11/30 21:07:03 seth
75# * put mail_anchor back in. (Why did I take this out?)
76# * Finally added handling of lettered lists (ordered lists marked with
77# letters)
78# * Added title option (--title, -t)
79# * Shortline now looks at how long the line was before txt2html
80# started adding tags. ($line_length)
81# * Changed list references to scalars where appropriate. (@foo[0] -> $foo[0])
82# * Added untabify() to homogenize leading indentation for list
83# prefixes and functions that use line length
84# * Added "underline tolerance" for when underlines are not exactly the
85# same length as what they underline.
86# * Added error message for unrecognized options
87# * removed \w matching on --capstag
88# * Tagline now removes leading & trailing whitespace before tagging
89# * swapped order of caps & heading in main loop
90# * Cleaned up code for speed and to get rid of warnings
91# * Added more restrictions to something being a mail header
92# * Added indentation for lists, just to make the output more readable.
93# * Fixed major bug in lists: $OL and $UL were never set, so when a
94# list was ended "</UL>" was *always* used!
95# * swapped order of hrule & header to properly handle long underlines
96#
97# Revision 1.7 94/10/28 13:16:11 13:16:11 seth (Seth Golub)
98# * Added to comments in options section
99# * renamed blank to is_blank
100# * Page break is converted to horizontal rule <HR>
101# * moved usage subroutine up top so people who look through code see
102# it sooner
103#
104# Revision 1.6 94/10/28 12:43:46 12:43:46 seth (Seth Golub)
105# * Creates anchors at each heading
106#
107# Revision 1.5 94/07/14 17:43:59 17:43:59 seth (Seth Golub)
108# * Fixed minor bug in Headers
109# * Preformatting can be set to only start/stop when TWO lines of
110# [non]formatted-looking-text are encountered. Old behavior is still
111# possible through command line options (-pb 1 -pe 1).
112# * Can preformat entire document (-pb 0) or disable preformatting
113# completely (-pe 0).
114# * Fixed minor bug in CAPS handling (paragraph breaks broke)
115# * Puts paragraph tags *before* paragraphs, not just between them.
116#
117# Revision 1.4 94/06/20 16:42:55 16:42:55 seth (Seth Golub)
118# * Allow ':' for numbered lists (e.g. "1: Figs")
119# * Whitespace at end of line will not start or end preformatting
120# * Mailmode is now off by default
121# * Doesn't break short lines if they are the first line in a list
122# item. It *should* break them anyway if the next line is a
123# continuation of the list item, but I haven't dealt with this yet.
124# * Added action on lines that are all capital letters. You can change
125# how these lines get tagged, as well as the mininum number of
126# consecutive capital letters required to fire off this action.
127#
128# Revision 1.3 94/05/17 15:58:58 15:58:58 seth (Seth Golub)
129# * Tiny bugfix in unhyphenation
130#
131# Revision 1.2 94/05/16 18:15:16 18:15:16 seth (Seth Golub)
132# * Added unhyphenation
133#
134# Revision 1.1 94/05/16 16:19:03 16:19:03 seth (Seth Golub)
135# Initial revision
136#
137#
138# 1.02 Allow '-' in mail headers
139# Added handling for multiline mail headers
140#
141#
142#
143# Oscar Nierstrasz has a nice script for hypertextifying URLs.
144# It is available at:
145# http://cui_www.unige.ch/ftp/PUBLIC/oscar/scripts/html.pl
146#
147
148#########################
149# Configurable options
150#
151
152# [-s <n> ] | [--shortline <n> ]
153$short_line_length = 40; # Lines this short (or shorter) must be
154 # intentionally broken and are kept
155 # that short. <BR>
156
157# [-p <n> ] | [--prewhite <n> ]
158$preformat_whitespace_min = 5; # Minimum number of consecutive leading
159 # whitespace characters to trigger
160 # preformatting.
161 # NOTE: Tabs are now expanded to
162 # spaces before this check is made.
163 # That means if $tab_width is 8 and
164 # this is 5, then one tab is expanded
165 # to 8 spaces, which is enough to
166 # trigger preformatting.
167
168# [-pb <n> ] | [--prebegin <n> ]
169$preformat_trigger_lines = 2; # How many lines of preformatted-looking
170 # text are needed to switch to <PRE>
171 # <= 0 : Preformat entire document
172 # 1 : one line triggers
173 # >= 2 : two lines trigger
174
175# [-pe <n> ] | [--preend <n> ]
176$endpreformat_trigger_lines = 2; # How many lines of unpreformatted-looking
177 # text are needed to switch from <PRE>
178 # <= 0 : Never preformat within document
179 # 1 : one line triggers
180 # >= 2 : two lines trigger
181# NOTE for --prebegin and --preend:
182# A zero takes precedence. If one is zero, the other is ignored.
183# If both are zero, entire document is preformatted.
184
185
186# [-r <n> ] | [--hrule <n> ]
187$hrule_min = 4; # Min number of ---s for an HRule.
188
189# [-c <n> ] | [--caps <n> ]
190$min_caps_length = 3; # min sequential CAPS for an all-caps line
191
192# [-ct <tag> ] | [--capstag <tag> ]
193$caps_tag = "STRONG"; # Tag to put around all-caps lines
194
195# [-m/+m ] | [--mail / --nomail ]
196$mailmode = 0; # Deal with mail headers & quoted text
197
198# [-u/+u ] | [--unhyphenate / --nounhyphenate ]
199$unhyphenation = 1; # Enables unhyphenation of text.
200
201# [-a <file> ] | [--append <file> ]
202# [+a ] | [--noappend ]
203$append_file = 0; # If you want something appended by
204 # default, put the filename here.
205 # The appended text will not be
206 # processed at all, so make sure it's
207 # plain text or decent HTML. i.e. do
208 # not have things like:
209 # Seth Golub <seth@cs.wustl.edu>
210 # but instead, have:
211 # Seth Golub &lt;seth@cs.wustl.edu&gt;
212
213# [-t <title>] | [--title <title> ]
214$title = 0; # You can specify a title.
215 # Otherwise it won't put one in.
216
217# [-ul <n> ] | [--underlinelong <n> ]
218$underline_tolerance_long = 1; # How much longer can underlines
219 # be and still be underlines?
220
221# [-us <n> ] | [--underlineshort <n> ]
222$underline_tolerance_short = 1; # How much shorter can underlines
223 # be and still be underlines?
224
225# [-tw <n> ] | [--tabwidth <n> ]
226$tab_width = 8; # How many spaces equal a tab?
227
228
229# [-iw <n> ] | [--indent <n> ]
230$indent_width = 2; # Indents this many spaces for each
231 # level of a list
232
233# [-/+e ] | [--extract / --noextract ]
234$extract = 0; # Extract Mode (suitable for inserting)
235
236# END OF CONFIGURABLE OPTIONS
237########################################
238
239
240########################################
241# Definitions (Don't change these)
242#
243$NONE = 0;
244$LIST = 1;
245$HRULE = 2;
246$PAR = 4;
247$PRE = 8;
248$END = 16;
249$BREAK = 32;
250$HEADER = 64;
251$MAILHEADER = 128;
252$MAILQUOTE = 256;
253$CAPS = 512;
254
255$OL = 1;
256$UL = 2;
257
258sub usage
259{
260 $0 =~ s#.*/##;
261 local($s) = " " x length($0);
262 print STDERR <<EOF;
263
264Usage: $0 [options]
265
266where options are:
267 $s [-v ] | [--version ]
268 $s [-h ] | [--help ]
269 $s [-s <n> ] | [--shortline <n> ]
270 $s [-p <n> ] | [--prewhite <n> ]
271 $s [-pb <n> ] | [--prebegin <n> ]
272 $s [-pe <n> ] | [--preend <n> ]
273 $s [-e/+e ] | [--extract / --noextract ]
274 $s [-r <n> ] | [--hrule <n> ]
275 $s [-c <n> ] | [--caps <n> ]
276 $s [-ct <tag> ] | [--capstag <tag> ]
277 $s [-m/+m ] | [--mail / --nomail ]
278 $s [-u/+u ] | [--unhyphen / --nounhyphen ]
279 $s [-a <file> ] | [--append <file> ]
280 $s [+a ] | [--noappend ]
281 $s [-t <title>] | [--title <title> ]
282 $s [-tw <n> ] | [--tabwidth <n> ]
283 $s [-iw <n> ] | [--indent <n> ]
284 $s [-ul <n> ] | [--underlinelong <n> ]
285 $s [-us <n> ] | [--underlineshort <n> ]
286
287 More complete explanations of these options can be found in
288 comments near the beginning of the script.
289
290EOF
291}
292
293
294sub deal_with_options
295{
296 while ($ARGV[0] =~ /^[-+].+/)
297 {
298 if (($ARGV[0] eq "-r" || $ARGV[0] eq "--hrule") &&
299 $ARGV[1] =~ /^%d+$/)
300 {
301 $hrule_min = $ARGV[1];
302 shift @ARGV;
303 next;
304 }
305
306 if (($ARGV[0] eq "-s" || $ARGV[0] eq "--shortline") &&
307 $ARGV[1] =~ /^\d+$/)
308 {
309 $short_line_length = $ARGV[1];
310 shift @ARGV;
311 next;
312 }
313
314 if (($ARGV[0] eq "-p" || $ARGV[0] eq "--prewhite") &&
315 $ARGV[1] =~ /^\d+$/)
316 {
317 $preformat_whitespace_min = $ARGV[1];
318 shift @ARGV;
319 next;
320 }
321
322 if (($ARGV[0] eq "-pb" || $ARGV[0] eq "--prebegin") &&
323 $ARGV[1] =~ /^\d+$/)
324 {
325 $preformat_trigger_lines = $ARGV[1];
326 shift @ARGV;
327 next;
328 }
329
330 if (($ARGV[0] eq "-pe" || $ARGV[0] eq "--preend") &&
331 $ARGV[1] =~ /^\d+$/)
332 {
333 $endpreformat_trigger_lines = $ARGV[1];
334 shift @ARGV;
335 next;
336 }
337
338 if (($ARGV[0] eq "-e" || $ARGV[0] eq "--extract"))
339 {
340 $extract = 1;
341 shift @ARGV;
342 next;
343 }
344
345 if (($ARGV[0] eq "+e" || $ARGV[0] eq "--noextract"))
346 {
347 $extract = 0;
348 shift @ARGV;
349 next;
350 }
351
352 if (($ARGV[0] eq "-c" || $ARGV[0] eq "--caps") &&
353 $ARGV[1] =~ /^\d+$/)
354 {
355 $min_caps_length = $ARGV[1];
356 shift @ARGV;
357 next;
358 }
359
360 if (($ARGV[0] eq "-ct" || $ARGV[0] eq "--capstag") &&
361 $ARGV[1])
362 {
363 $caps_tag = $ARGV[1];
364 shift @ARGV;
365 next;
366 }
367
368 if ($ARGV[0] eq "-m" || $ARGV[0] eq "--mail")
369 {
370 $mailmode = 1;
371 next;
372 }
373
374 if ($ARGV[0] eq "+m" || $ARGV[0] eq "--nomail")
375 {
376 $mailmode = 0;
377 next;
378 }
379
380 if ($ARGV[0] eq "-u" || $ARGV[0] eq "--unhyphen")
381 {
382 $unhyphenation = 1;
383 next;
384 }
385
386 if ($ARGV[0] eq "+u" || $ARGV[0] eq "--nounhyphen")
387 {
388 $unhyphenation = 0;
389 next;
390 }
391
392 if (($ARGV[0] eq "-a" || $ARGV[0] eq "--append") &&
393 $ARGV[1])
394 {
395 if (-r $ARGV[1]) {
396 $append_file = $ARGV[1];
397 } else {
398 print STDERR "Can't find or read $ARGV[1].\n";
399 }
400 shift @ARGV;
401 next;
402 }
403
404 if ($ARGV[0] eq "+a" || $ARGV[0] eq "--noappend")
405 {
406 $append_file = 0;
407 next;
408 }
409
410 if (($ARGV[0] eq "-t" || $ARGV[0] eq "--title") &&
411 $ARGV[1])
412 {
413 $title = $ARGV[1];
414 shift @ARGV;
415 next;
416 }
417
418 if (($ARGV[0] eq "-ul" || $ARGV[0] eq "--underlinelong") &&
419 $ARGV[1] =~ /^\d+$/)
420 {
421 $underline_tolerance_long = $ARGV[1];
422 shift @ARGV;
423 next;
424 }
425
426 if (($ARGV[0] eq "-us" || $ARGV[0] eq "--underlineshort") &&
427 $ARGV[1] =~ /^\d+$/)
428 {
429 $underline_tolerance_short = $ARGV[1];
430 shift @ARGV;
431 next;
432 }
433
434 if (($ARGV[0] eq "-tw" || $ARGV[0] eq "--tabwidth") &&
435 $ARGV[1] =~ /^\d+$/)
436 {
437 $tab_width = $ARGV[1];
438 shift @ARGV;
439 next;
440 }
441
442 if (($ARGV[0] eq "-iw" || $ARGV[0] eq "--indentwidth") &&
443 $ARGV[1] =~ /^\d+$/)
444 {
445 $indent_width = $ARGV[1];
446 shift @ARGV;
447 next;
448 }
449
450 if ($ARGV[0] eq "-v" || $ARGV[0] eq "--version")
451 {
452 print '$Header: /users/hilco/seth/projects/txt2html/txt2html.pl,v 1
453.10 1994/12/28 20:10:25 seth Exp seth $ ';
454 print "\n";
455 exit;
456 }
457
458 if ($ARGV[0] eq "-h" || $ARGV[0] eq "--help")
459 {
460 &usage;
461 exit;
462 }
463
464 print STDERR "Unrecognized option: $ARGV[0]\n";
465 print STDERR " or bad paramater: $ARGV[1]\n" if($ARGV[1]);
466
467 &usage;
468 exit(1);
469
470 } continue {
471
472 shift @ARGV;
473 }
474
475 $preformat_trigger_lines = 0 if ($preformat_trigger_lines < 0);
476 $preformat_trigger_lines = 2 if ($preformat_trigger_lines > 2);
477
478 $endpreformat_trigger_lines = 1 if ($preformat_trigger_lines == 0);
479 $endpreformat_trigger_lines = 0 if ($endpreformat_trigger_lines < 0);
480 $endpreformat_trigger_lines = 2 if ($endpreformat_trigger_lines > 2);
481
482 $underline_tolerance_long = 0 if $underline_tolerance_long < 0;
483 $underline_tolerance_short = 0 if $underline_tolerance_short < 0;
484}
485
486sub is_blank
487{
488 return $_[0] =~ /^\s*$/;
489}
490
491sub escape
492{
493 $line =~ s/&/&amp;/g;
494 $line =~ s/>/&gt;/g;
495 $line =~ s/</&lt;/g;
496 $line =~ s/\014/\n<HR>\n/g; # Linefeeds become horizontal rules
497}
498
499sub hrule
500{
501 if ($line =~ /^\s*([-_~=\*]\s*){$hrule_min,}$/)
502 {
503 $line = "<HR>\n";
504 $prev =~ s/<p>//;
505 $line_action |= $HRULE;
506 }
507}
508
509sub shortline
510{
511 if (!($mode & $PRE) &&
512 !&is_blank($line) &&
513 ($line_length < $short_line_length) &&
514 !&is_blank($nextline) &&
515 !($line_action & ($HEADER | $HRULE | $BREAK | $LIST)))
516 {
517 $line =~ s/$/<BR>/;
518 $line_action |= $BREAK;
519 }
520}
521
522sub mailstuff
523{
524 if ((($line =~ /^\w*&gt/) || # Handle "FF> Werewolves."
525 ($line =~ /^\w*\|/))&& # Handle "Igor| There wolves."
526 !&is_blank($nextline))
527 {
528 $line =~ s/$/<BR>/;
529 $line_action |= $BREAK | $MAILQUOTE;
530 } elsif (($line =~ /^[\w\-]*:/) # Handle "Some-Header: blah"
531 && (($previous_action & $MAILHEADER) || &is_blank($prev))
532 && !&is_blank($nextline))
533 {
534 &anchor_mail if !($previous_action & $MAILHEADER);
535 $line =~ s/$/<BR>/;
536 $line_action |= $BREAK | $MAILHEADER;
537 } elsif (($line =~ /^\s+\S/) && # Handle multi-line mail headers
538 ($previous_action & $MAILHEADER) &&
539 !&is_blank($nextline))
540 {
541 $line =~ s/$/<BR>/;
542 $line_action |= $BREAK | $MAILHEADER;
543 }
544}
545
546sub paragraph
547{
548 $prev .= "<p>\n";
549 $line_action |= $PAR;
550}
551
552sub listprefix
553{
554 local($line) = @_;
555 local($prefix, $number, $rawprefix);
556
557 return (0,0,0) if (!($line =~ /^\s*[-=\*o]\s+\S/ ) &&
558 !($line =~ /^\s*(\d+|[a-zA-Z])[\.\)\]:]\s+\S/ ));
559
560 ($number) = $line =~ /^\s*(\d+|[a-zA-Z])/;
561
562 # That slippery exception of "o" as a bullet
563 # (This ought to be determined more through the context of what lists
564 # we have in progress, but this will probably work well enough.)
565 if($line =~ /^\s*o\s/)
566 {
567 $number = 0;
568 }
569
570 if ($number)
571 {
572 ($rawprefix) = $line =~ /^(\s*(\d+|[a-zA-Z]).)/;
573 $prefix = $rawprefix;
574 $prefix =~ s/(\d+|[a-zA-Z])//; # Take the number out
575 } else {
576 ($rawprefix) = $line =~ /^(\s*[-=o\*].)/;
577 $prefix = $rawprefix;
578 }
579 ($prefix, $number, $rawprefix);
580}
581
582sub startlist
583{
584 local($prefix, $number, $rawprefix) = @_;
585
586 $listprefix[$listnum] = $prefix;
587 if($number)
588 {
589 # It doesn't start with 1,a,A. Let's not screw with it.
590 if (($number != 1) && ($number ne "a") && ($number ne "A"))
591 {
592 return;
593 }
594 $prev .= "$list_indent<OL>\n";
595 $list[$listnum] = $OL;
596 } else {
597 $prev .= "$list_indent<font size=-2><UL>\n";
598 $list[$listnum] = $UL;
599 }
600 $listnum++;
601 $list_indent = " " x $listnum x $indent_width;
602 $line_action |= $LIST;
603 $mode |= $LIST;
604}
605
606
607sub endlist # End N lists
608{
609 local($n) = @_;
610 for(; $n > 0; $n--, $listnum--)
611 {
612 $list_indent = " " x ($listnum-1) x $indent_width;
613 if($list[$listnum-1] == $UL)
614 {
615 $prev .= "$list_indent</UL></font>\n";
616 } elsif($list[$listnum-1] == $OL)
617 {
618 $prev .= "$list_indent</OL>\n";
619 } else
620 {
621 print STDERR "Encountered list of unknown type\n";
622 }
623 }
624 $line_action |= $END;
625 $mode ^= ($LIST & $mode) if (!$listnum);
626}
627
628sub continuelist
629{
630 $line =~ s/^\s*[-=o\*]\s*/$list_indent<LI> / if $list[$listnum-1] == $UL;
631 $line =~ s/^\s*(\d+|[a-zA-Z]).\s*/$list_indent<LI> / if $list[$listnum-1
632] == $OL;
633 $line_action |= $LIST;
634}
635
636sub liststuff
637{
638 local($i);
639
640 local($prefix, $number, $rawprefix) = &listprefix($line);
641
642 $i = $listnum;
643 if (!$prefix)
644 {
645 return if !&is_blank($prev); # inside a list item
646
647 # This ain't no list. We'll want to end all of them.
648 return if !($mode & $LIST); # This just speeds up the inevitable
649 $i = 0;
650 } else
651 {
652 # Maybe we're going back up to a previous list
653 $i-- while (($prefix ne $listprefix[$i-1]) && ($i >= 0));
654 }
655
656 if (($i >= 0) && ($i != $listnum))
657 {
658 &endlist($listnum - $i);
659 } elsif (!$listnum || $i != $listnum)
660 {
661 &startlist($prefix, $number, $rawprefix);
662 }
663
664 &continuelist($prefix, $number, $rawprefix) if ($mode & $LIST);
665}
666
667sub endpreformat
668{
669 if(!($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
670 ($endpreformat_trigger_lines == 1 ||
671 !($nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
672 {
673 $prev =~ s#$#\n</PRE></font>#;
674 $mode ^= ($PRE & $mode);
675 $line_action |= $END;
676 }
677}
678
679sub preformat
680{
681 if($preformat_trigger_lines == 0 ||
682 (($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
683 ($preformat_trigger_lines == 1 ||
684 $nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
685 {
686 $line =~ s/^/<font size=-1><PRE>\n/;
687 $prev =~ s/<p>//;
688 $mode |= $PRE;
689 $line_action |= $PRE;
690 }
691}
692
693sub make_new_anchor
694{
695 $anchor++;
696 $anchor;
697}
698
699sub anchor_mail
700{
701 local($text) = $line =~ /\S+: *(.*) *$/;
702 local($anchor) = &make_new_anchor($text);
703 $line =~ s/(.*)/<A NAME="$anchor">$1<\/A>/;
704}
705
706sub anchor_heading
707{
708 local($heading) = @_;
709 local($anchor) = &make_new_anchor($heading);
710 $line =~ s/(<H.>.*<\/H.>)/<A NAME="$anchor">$1<\/A>/;
711}
712
713sub heading
714{
715 local($hindent, $heading) = $line =~ /^(\s*)(.+)$/;
716 $hindent = 0; # This isn't used yet, but Perl warns of
717 # "possible typo" if I declare a var
718 # and never reference it.
719
720 # This is now taken care of in main()
721# $heading =~ s/\s+$//; # get rid of trailing whitespace.
722
723 local($underline) = $nextline =~ /^\s*(\S+)\s*$/;
724
725 if((length($heading) > (length($underline) + $underline_tolerance_short))
726 || (length($heading) < (length($underline) -$underline_tolerance_long)))
727 {
728 return;
729 }
730
731# $underline =~ s/(^.).*/$1/; # Could I do this any less efficiently?
732 $underline = substr($underline,0,1);
733
734 local($hlevel);
735 $hlevel = 1 if $underline eq "*";
736 $hlevel = 2 if $underline eq "=";
737 $hlevel = 3 if $underline eq "+";
738 $hlevel = 4 if $underline eq "-";
739 $hlevel = 5 if $underline eq "~";
740 $hlevel = 6 if $underline eq ".";
741 return if !$hlevel;
742
743 $nextline = <STDIN>; # Eat the underline
744 &tagline("H${hlevel}");
745 &anchor_heading($heading);
746 $line_action |= $HEADER;
747}
748
749sub unhyphenate
750{
751 local($second);
752
753 # This looks hairy because of all the quoted characters.
754 # All I'm doing is pulling out the word that begins the next line.
755 # Along with it, I pull out any punctuation that follows.
756 # Preceding whitespace is preserved. We don't want to screw up
757 # our own guessing systems that rely on indentation.
758 ($second) = $nextline =~ /^\s*([a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*)/; # "
759 $nextline =~ s/^(\s*)[a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*/$1/; # "
760 # (The silly comments are for my less-than-perfect code hilighter)
761
762 $line =~ s/\-\s*$/$second/;
763 $line .= "\n";
764}
765
766sub untabify
767{
768 local($oldws) = $line =~ /^([ \011]+)/;
769 local($oldlen) = (length($oldws));
770
771 local($i, $column);
772 for($i=0, $column = 0; $i < $oldlen; $i++)
773 {
774 if(substr($oldws, $i, 1) eq " ")
775 { # Space
776 $column++;
777 } else { # Tab
778 $column += $tab_width - ($column % $tab_width);
779 }
780 }
781 $line = (" " x $column) . substr($line, $oldlen);
782}
783
784sub tagline
785{
786 local($tag) = @_;
787 $line =~ s/^\s*(.*)\s*$/<$tag>$1<\/$tag>\n/;
788}
789
790sub caps
791{
792 if($line =~ /^[^a-z<]*[A-Z]{$min_caps_length,}[^a-z<]*$/)
793 {
794 &tagline($caps_tag);
795 $line_action |= $CAPS;
796 }
797}
798
799
800
801sub main
802{
803 &deal_with_options;
804
805 if(1)
806 {
807 print q(
808<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
809 "http://www.w3.org/TR/html4/loose.dtd">
810<html lang="en-US">
811<head>
812<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
813<meta name="Description" content="ImageMagick - a robust collection of tools and libraries to read, write and manipulate an image in any of the popular image formats. ImageMagick allows dynamic creation of GIFs, making it suitable for Web applications.">
814<meta name="Keywords" content="ImageMagick,Image Magick,Image Magic,PerlMagick,Perl Magick,Perl Magic,WebMagick,Web Magic,image processing,software development,simulation,image software,AniMagick,Animagic,Magick++">
815<meta name="Resource-type" content="document">
816<meta name="Robots" content="ALL">
817<link rel="stylesheet" type="text/css" href="../www/magick.css">
818</head>
819
820<body marginheight=1 marginwidth=1 topmargin=1 leftmargin=1>
821<a name="top"></a>
822<table border="0" cellpadding="0" cellspacing="0" summary="Masthead" width="100%">
823<tbody>
824<tr>
825<td bgcolor="#003399" width="25%" height="118" background="../images/background.gif"><a href="http://www.imagemagick.org/"><img src="../images/script.gif" width="278" height="118" border="0" alt="" /></a></td>
826<td bgcolor="#003399" width="60%" height="118" background="../images/background.gif"><a href="http://www.networkeleven.com/direct.php?magick_all"><img src="../images/promote.png" border="0" width="186" height="52" vspace="29" alt="Powered by NetworkEleven" /></a></td>
827<td bgcolor="#003399" width="114" height="118" align="right"><img src="../images/sprite.png" width="114" height="118" alt="" /></td>
828<td bgcolor="#003399" width="114" height="118" align="right"><a href="http://www.imagemagick.net"><img src="../images/logo.png" width="114" height="118" border="0" alt="ImageMagick logo" /></a></td>
829</tr></tbody></table>
830</table><table align="left" border=0 cellpadding=2 cellspacing=2 summary="Navigation buttons" width="20%">
831 <tr>
832 <td>
833 <form target="_self" action="../index.html"><input type="submit" title="ImageMagick Home" value=" Home " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
834 </td><td>
835 <form target="_self" action="../www/apis.html"><input type="submit" title="ImageMagick API" value=" API " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
836 </td><td>
837 <form target="_self" action="../www/archives.html"><input type="submit" title="ImageMagick Download" value="Download" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
838 </td>
839 </tr>
840</table>
841<div align="right" style="margin-top:3px; padding-right:4px">
842 <form action="http://studio.imagemagick.org/Sage/scripts/Sage.cgi">
843 <input type="TEXT" name="query" size=32 maxlength=255>
844 <input type="SUBMIT" name="sa" value="Search" style="background-image:url('../images/background.gif'); bgcolor:#003399; color:#fbc713; font-weight:bold">
845 </form><br>
846</div>
847<table align="left" border=0 cellpadding=10 cellspacing=0 style="margin-top:-17px" width="100%"><tr><td>
848
849<br>&nbsp;<br>
850) . "\n";
851 print "<HTML>\n";
852 print "<HEAD>\n";
853
854 # It'd be nice if we could guess a title from the first header,
855 # but even that would be too late if we're doing this in one pass.
856 print "<TITLE>$title</TITLE>\n" if($title);
857
858 print "</HEAD>\n";
859
860 print q(<body text="#000000" bgcolor="#fbc713" link="#1F00FF" vlink="#9900DD" alink="#FF0000">) . "\n";
861
862 if ($title) {
863 print "<h3>$title</h3>\n";
864 }
865 }
866
867 $prev = "";
868 $line = <STDIN>;
869 $nextline = <STDIN>;
870 do
871 {
872 $line =~ s/[ \011]*$//; # Chop trailing whitespace
873
874 &untabify; # Change leading whitespace into spaces
875
876 $line_length = length($line); # Do this before tags go in
877
878 &escape;
879
880 &endpreformat if (($mode & $PRE) && ($preformat_trigger_lines != 0));
881
882 &hrule if !($mode & $PRE);
883
884 &heading if (!($mode & $PRE) &&
885 $nextline =~ /^\s*[=\-\*\.~\+]+$/);
886
887 &caps if !($mode & $PRE);
888
889 &liststuff if (!($mode & $PRE) &&
890 !&is_blank($line));
891
892 &mailstuff if ($mailmode &&
893 !($mode & $PRE) &&
894 !($line_action & $HEADER));
895
896 &preformat if (!($line_action & ($HEADER | $LIST | $MAILHEADER)) &&
897 !($mode & ($LIST | $PRE)) &&
898 ($endpreformat_trigger_lines != 0));
899
900 &paragraph if ((&is_blank($prev) || ($line_action & $END)) &&
901 !&is_blank($line) &&
902 !($mode & ($LIST | $PRE)) && # paragraphs in lists
903 # *should* be allowed.
904 (!$line_action ||
905 ($line_action & ($CAPS | $END | $MAILQUOTE))));
906
907 &shortline;
908
909 &unhyphenate if ($unhyphenation &&
910 ($line =~ /[a-zA-Z]\-$/) && # ends in hyphen
911 # next line starts w/letters
912 ($nextline =~ /^\s*[a-zA-Z]/) &&
913 !($mode & ($PRE | $HEADER | $MAILHEADER | $BREAK)));
914
915
916 # Print it out and move on.
917
918 print $prev;
919
920 if (!&is_blank($nextline))
921 {
922 $previous_action = $line_action;
923 $line_action = $NONE;
924 }
925
926 $prev = $line;
927 $line = $nextline;
928 $nextline = <STDIN>;
929 } until (!$nextline && !$line && !$prev);
930
931 $prev = "";
932 &endlist($listnum) if ($mode & $LIST); # End all lists
933 print $prev;
934
935 print "\n";
936
937 print "</PRE></font>\n" if ($mode & $PRE);
938
939 if ($append_file)
940 {
941 if(-r $append_file)
942 {
943 open(APPEND, $append_file);
944 print while <APPEND>;
945 } else {
946 print STDERR "Can't find or read file $append_file to append.\n";
947 }
948 } else {
949 print q(<hr>) . "\n";
950print q(
951<a href="#top"><img src="../images/top.gif" border=0 width="35" height="46" align="right" alt="Top of page"></a>
952<form action="http://studio.imagemagick.org/magick/" style="margin-top:5px">
953 <input type="submit" title="Help!" value="Help!" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold">
954 <small>&quot;Image manipulation software that works like magick&quot;</small>
955 </form></td>
956 </tr></table>
957) . "\n";
958 }
959
960 if(!$extract)
961 {
962 print "</BODY>\n";
963 print "</HTML>\n";
964 }
965}
966
967&main();
968
969