Annotation of embedaddon/pcre/132html, revision 1.1.1.2

1.1       misho       1: #! /usr/bin/perl -w
                      2: 
                      3: # Script to turn PCRE man pages into HTML
                      4: 
                      5: 
                      6: # Subroutine to handle font changes and other escapes
                      7: 
                      8: sub do_line {
                      9: my($s) = $_[0];
                     10: 
                     11: $s =~ s/</&#60;/g;                   # Deal with < and >
                     12: $s =~ s/>/&#62;/g;
                     13: $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
                     14: $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
                     15: $s =~ s"\\e"\\"g;
                     16: $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
                     17: $s;
                     18: }
                     19: 
                     20: # Subroutine to ensure not in a paragraph
                     21: 
                     22: sub end_para {
                     23: if ($inpara)
                     24:   {
                     25:   print TEMP "</PRE>\n" if ($inpre);
                     26:   print TEMP "</P>\n";
                     27:   }
                     28: $inpara = $inpre = 0;
                     29: $wrotetext = 0;
                     30: }
                     31: 
                     32: # Subroutine to start a new paragraph
                     33: 
                     34: sub new_para {
                     35: &end_para();
                     36: print TEMP "<P>\n";
                     37: $inpara = 1;
                     38: }
                     39: 
                     40: 
                     41: # Main program
                     42: 
                     43: $innf = 0;
                     44: $inpara = 0;
                     45: $inpre = 0;
                     46: $wrotetext = 0;
                     47: $toc = 0;
                     48: $ref = 1;
                     49: 
                     50: while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
                     51:   {
                     52:   $toc = 1 if $ARGV[0] eq "-toc";
                     53:   shift;
                     54:   }
                     55: 
                     56: # Initial output to STDOUT
                     57: 
                     58: print <<End ;
                     59: <html>
                     60: <head>
                     61: <title>$ARGV[0] specification</title>
                     62: </head>
                     63: <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
                     64: <h1>$ARGV[0] man page</h1>
                     65: <p>
                     66: Return to the <a href="index.html">PCRE index page</a>.
                     67: </p>
                     68: <p>
                     69: This page is part of the PCRE HTML documentation. It was generated automatically
                     70: from the original man page. If there is any nonsense in it, please consult the
                     71: man page, in case the conversion went wrong.
                     72: <br>
                     73: End
                     74: 
                     75: print "<ul>\n" if ($toc);
                     76: 
                     77: open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
                     78: 
                     79: while (<STDIN>)
                     80:   {
                     81:   # Handle lines beginning with a dot
                     82: 
                     83:   if (/^\./)
                     84:     {
                     85:     # Some of the PCRE man pages used to contain instances of .br. However,
                     86:     # they should have all been removed because they cause trouble in some
                     87:     # (other) automated systems that translate man pages to HTML. Complain if
                     88:     # we find .br or .in (another macro that is deprecated).
                     89: 
                     90:     if (/^\.br/ || /^\.in/)
                     91:       {
                     92:       print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
                     93:       print STDERR "*** $_\n";
                     94:       die "*** Processing abandoned\n";
                     95:       }
                     96: 
                     97:     # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
                     98: 
                     99:     elsif (/^\.nf/)
                    100:       {
                    101:       $innf = 1;
                    102:       }
                    103: 
                    104:     elsif (/^\.fi/)
                    105:       {
                    106:       $innf = 0;
                    107:       }
                    108: 
                    109:     # Handling .sp is subtle. If it is inside a literal section, do nothing if
                    110:     # the next line is a non literal text line; similarly, if not inside a
1.1.1.2 ! misho     111:     # literal section, do nothing if a literal follows, unless we are inside
        !           112:     # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
        !           113:     # literal sections will do the spacing. Always skip if no previous output.
1.1       misho     114: 
                    115:     elsif (/^\.sp/)
                    116:       {
                    117:       if ($wrotetext)
                    118:         {
                    119:         $_ = <STDIN>;
                    120:         if ($inpre)
                    121:           {
                    122:           print TEMP "\n" if (/^[\s.]/);
                    123:           }
                    124:         else
                    125:           {
1.1.1.2 ! misho     126:           print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
1.1       misho     127:           }
                    128:         redo;    # Now process the lookahead line we just read
                    129:         }
                    130:       }
                    131:     elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
                    132:       {
                    133:       &new_para();
                    134:       }
                    135:     elsif (/^\.SH\s*("?)(.*)\1/)
                    136:       {
                    137:       # Ignore the NAME section
                    138:       if ($2 =~ /^NAME\b/)
                    139:         {
                    140:         <STDIN>;
                    141:         next;
                    142:         }
                    143: 
                    144:       &end_para();
                    145:       my($title) = &do_line($2);
                    146:       if ($toc)
                    147:         {
                    148:         printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
                    149:           $ref, $ref);
                    150:         printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
                    151:           $ref, $ref);
                    152:         $ref++;
                    153:         }
                    154:       else
                    155:         {
                    156:         print TEMP "<br><b>\n$title\n</b><br>\n";
                    157:         }
                    158:       }
                    159:     elsif (/^\.SS\s*("?)(.*)\1/)
                    160:       {
                    161:       &end_para();
                    162:       my($title) = &do_line($2);
                    163:       print TEMP "<br><b>\n$title\n</b><br>\n";
                    164:       }
                    165:     elsif (/^\.B\s*(.*)/)
                    166:       {
                    167:       &new_para() if (!$inpara);
                    168:       $_ = &do_line($1);
                    169:       s/"(.*?)"/$1/g;
                    170:       print TEMP "<b>$_</b>\n";
                    171:       $wrotetext = 1;
                    172:       }
                    173:     elsif (/^\.I\s*(.*)/)
                    174:       {
                    175:       &new_para() if (!$inpara);
                    176:       $_ = &do_line($1);
                    177:       s/"(.*?)"/$1/g;
                    178:       print TEMP "<i>$_</i>\n";
                    179:       $wrotetext = 1;
                    180:       }
                    181: 
                    182:     # A comment that starts "HREF" takes the next line as a name that
                    183:     # is turned into a hyperlink, using the text given, which might be
                    184:     # in a special font. If it ends in () or (digits) or punctuation, they
                    185:     # aren't part of the link.
                    186: 
                    187:     elsif (/^\.\\"\s*HREF/)
                    188:       {
                    189:       $_=<STDIN>;
                    190:       chomp;
                    191:       $_ = &do_line($_);
                    192:       $_ =~ s/\s+$//;
                    193:       $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
                    194:       print TEMP "<a href=\"$1.html\">$_</a>\n";
                    195:       }
                    196: 
                    197:     # A comment that starts "HTML" inserts literal HTML
                    198: 
                    199:     elsif (/^\.\\"\s*HTML\s*(.*)/)
                    200:       {
                    201:       print TEMP $1;
                    202:       }
                    203: 
                    204:     # A comment that starts < inserts that HTML at the end of the
                    205:     # *next* input line - so as not to get a newline between them.
                    206: 
                    207:     elsif (/^\.\\"\s*(<.*>)/)
                    208:       {
                    209:       my($markup) = $1;
                    210:       $_=<STDIN>;
                    211:       chomp;
                    212:       $_ = &do_line($_);
                    213:       $_ =~ s/\s+$//;
                    214:       print TEMP "$_$markup\n";
                    215:       }
                    216: 
                    217:     # A comment that starts JOIN joins the next two lines together, with one
                    218:     # space between them. Then that line is processed. This is used in some
                    219:     # displays where two lines are needed for the "man" version. JOINSH works
                    220:     # the same, except that it assumes this is a shell command, so removes
                    221:     # continuation backslashes.
                    222: 
                    223:     elsif (/^\.\\"\s*JOIN(SH)?/)
                    224:       {
                    225:       my($one,$two);
                    226:       $one = <STDIN>;
                    227:       $two = <STDIN>;
                    228:       $one =~ s/\s*\\e\s*$// if (defined($1));
                    229:       chomp($one);
                    230:       $two =~ s/^\s+//;
                    231:       $_ = "$one $two";
                    232:       redo;            # Process the joined lines
                    233:       }
                    234: 
                    235:     # .EX/.EE are used in the pcredemo page to bracket the entire program,
                    236:     # which is unmodified except for turning backslash into "\e".
                    237: 
                    238:     elsif (/^\.EX\s*$/)
                    239:       {
                    240:       print TEMP "<PRE>\n";
                    241:       while (<STDIN>)
                    242:         {
                    243:         last if /^\.EE\s*$/;
                    244:         s/\\e/\\/g;
                    245:         s/&/&amp;/g;
                    246:         s/</&lt;/g;
                    247:         s/>/&gt;/g;
                    248:         print TEMP;
                    249:         }
                    250:       }
                    251: 
                    252:     # Ignore anything not recognized
                    253: 
                    254:     next;
                    255:     }
                    256: 
                    257:   # Line does not begin with a dot. Replace blank lines with new paragraphs
                    258: 
                    259:   if (/^\s*$/)
                    260:     {
                    261:     &end_para() if ($wrotetext);
                    262:     next;
                    263:     }
                    264: 
                    265:   # Convert fonts changes and output an ordinary line. Ensure that indented
                    266:   # lines are marked as literal.
                    267: 
                    268:   $_ = &do_line($_);
                    269:   &new_para() if (!$inpara);
                    270: 
                    271:   if (/^\s/)
                    272:     {
                    273:     if (!$inpre)
                    274:       {
                    275:       print TEMP "<pre>\n";
                    276:       $inpre = 1;
                    277:       }
                    278:     }
                    279:   elsif ($inpre)
                    280:     {
                    281:     print TEMP "</pre>\n";
                    282:     $inpre = 0;
                    283:     }
                    284: 
                    285:   # Add <br> to the end of a non-literal line if we are within .nf/.fi
                    286: 
                    287:   $_ .= "<br>\n" if (!$inpre && $innf);
                    288: 
                    289:   print TEMP;
                    290:   $wrotetext = 1;
                    291:   }
                    292: 
                    293: # The TOC, if present, will have been written - terminate it
                    294: 
                    295: print "</ul>\n" if ($toc);
                    296: 
                    297: # Copy the remainder to the standard output
                    298: 
                    299: close(TEMP);
                    300: open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
                    301: 
                    302: print while (<TEMP>);
                    303: 
                    304: print <<End ;
                    305: <p>
                    306: Return to the <a href="index.html">PCRE index page</a>.
                    307: </p>
                    308: End
                    309: 
                    310: close(TEMP);
                    311: unlink("/tmp/$$");
                    312: 
                    313: # End

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>