Annotation of embedaddon/pcre/132html, revision 1.1

1.1     ! misho       1: #! /usr/bin/perl -w
        !             2: 
        !             3: # Script to turn PCRE man pages into HTML
        !             4: 
        !             5: 
        !             6: # Subroutine to handle font changes and other escapes
        !             7: 
        !             8: sub do_line {
        !             9: my($s) = $_[0];
        !            10: 
        !            11: $s =~ s/</&#60;/g;                   # Deal with < and >
        !            12: $s =~ s/>/&#62;/g;
        !            13: $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
        !            14: $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
        !            15: $s =~ s"\\e"\\"g;
        !            16: $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
        !            17: $s;
        !            18: }
        !            19: 
        !            20: # Subroutine to ensure not in a paragraph
        !            21: 
        !            22: sub end_para {
        !            23: if ($inpara)
        !            24:   {
        !            25:   print TEMP "</PRE>\n" if ($inpre);
        !            26:   print TEMP "</P>\n";
        !            27:   }
        !            28: $inpara = $inpre = 0;
        !            29: $wrotetext = 0;
        !            30: }
        !            31: 
        !            32: # Subroutine to start a new paragraph
        !            33: 
        !            34: sub new_para {
        !            35: &end_para();
        !            36: print TEMP "<P>\n";
        !            37: $inpara = 1;
        !            38: }
        !            39: 
        !            40: 
        !            41: # Main program
        !            42: 
        !            43: $innf = 0;
        !            44: $inpara = 0;
        !            45: $inpre = 0;
        !            46: $wrotetext = 0;
        !            47: $toc = 0;
        !            48: $ref = 1;
        !            49: 
        !            50: while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
        !            51:   {
        !            52:   $toc = 1 if $ARGV[0] eq "-toc";
        !            53:   shift;
        !            54:   }
        !            55: 
        !            56: # Initial output to STDOUT
        !            57: 
        !            58: print <<End ;
        !            59: <html>
        !            60: <head>
        !            61: <title>$ARGV[0] specification</title>
        !            62: </head>
        !            63: <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
        !            64: <h1>$ARGV[0] man page</h1>
        !            65: <p>
        !            66: Return to the <a href="index.html">PCRE index page</a>.
        !            67: </p>
        !            68: <p>
        !            69: This page is part of the PCRE HTML documentation. It was generated automatically
        !            70: from the original man page. If there is any nonsense in it, please consult the
        !            71: man page, in case the conversion went wrong.
        !            72: <br>
        !            73: End
        !            74: 
        !            75: print "<ul>\n" if ($toc);
        !            76: 
        !            77: open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
        !            78: 
        !            79: while (<STDIN>)
        !            80:   {
        !            81:   # Handle lines beginning with a dot
        !            82: 
        !            83:   if (/^\./)
        !            84:     {
        !            85:     # Some of the PCRE man pages used to contain instances of .br. However,
        !            86:     # they should have all been removed because they cause trouble in some
        !            87:     # (other) automated systems that translate man pages to HTML. Complain if
        !            88:     # we find .br or .in (another macro that is deprecated).
        !            89: 
        !            90:     if (/^\.br/ || /^\.in/)
        !            91:       {
        !            92:       print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
        !            93:       print STDERR "*** $_\n";
        !            94:       die "*** Processing abandoned\n";
        !            95:       }
        !            96: 
        !            97:     # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
        !            98: 
        !            99:     elsif (/^\.nf/)
        !           100:       {
        !           101:       $innf = 1;
        !           102:       }
        !           103: 
        !           104:     elsif (/^\.fi/)
        !           105:       {
        !           106:       $innf = 0;
        !           107:       }
        !           108: 
        !           109:     # Handling .sp is subtle. If it is inside a literal section, do nothing if
        !           110:     # the next line is a non literal text line; similarly, if not inside a
        !           111:     # literal section, do nothing if a literal follows. The point being that
        !           112:     # the <pre> and </pre> that delimit literal sections will do the spacing.
        !           113:     # Always skip if no previous output.
        !           114: 
        !           115:     elsif (/^\.sp/)
        !           116:       {
        !           117:       if ($wrotetext)
        !           118:         {
        !           119:         $_ = <STDIN>;
        !           120:         if ($inpre)
        !           121:           {
        !           122:           print TEMP "\n" if (/^[\s.]/);
        !           123:           }
        !           124:         else
        !           125:           {
        !           126:           print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
        !           127:           }
        !           128:         redo;    # Now process the lookahead line we just read
        !           129:         }
        !           130:       }
        !           131:     elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
        !           132:       {
        !           133:       &new_para();
        !           134:       }
        !           135:     elsif (/^\.SH\s*("?)(.*)\1/)
        !           136:       {
        !           137:       # Ignore the NAME section
        !           138:       if ($2 =~ /^NAME\b/)
        !           139:         {
        !           140:         <STDIN>;
        !           141:         next;
        !           142:         }
        !           143: 
        !           144:       &end_para();
        !           145:       my($title) = &do_line($2);
        !           146:       if ($toc)
        !           147:         {
        !           148:         printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
        !           149:           $ref, $ref);
        !           150:         printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
        !           151:           $ref, $ref);
        !           152:         $ref++;
        !           153:         }
        !           154:       else
        !           155:         {
        !           156:         print TEMP "<br><b>\n$title\n</b><br>\n";
        !           157:         }
        !           158:       }
        !           159:     elsif (/^\.SS\s*("?)(.*)\1/)
        !           160:       {
        !           161:       &end_para();
        !           162:       my($title) = &do_line($2);
        !           163:       print TEMP "<br><b>\n$title\n</b><br>\n";
        !           164:       }
        !           165:     elsif (/^\.B\s*(.*)/)
        !           166:       {
        !           167:       &new_para() if (!$inpara);
        !           168:       $_ = &do_line($1);
        !           169:       s/"(.*?)"/$1/g;
        !           170:       print TEMP "<b>$_</b>\n";
        !           171:       $wrotetext = 1;
        !           172:       }
        !           173:     elsif (/^\.I\s*(.*)/)
        !           174:       {
        !           175:       &new_para() if (!$inpara);
        !           176:       $_ = &do_line($1);
        !           177:       s/"(.*?)"/$1/g;
        !           178:       print TEMP "<i>$_</i>\n";
        !           179:       $wrotetext = 1;
        !           180:       }
        !           181: 
        !           182:     # A comment that starts "HREF" takes the next line as a name that
        !           183:     # is turned into a hyperlink, using the text given, which might be
        !           184:     # in a special font. If it ends in () or (digits) or punctuation, they
        !           185:     # aren't part of the link.
        !           186: 
        !           187:     elsif (/^\.\\"\s*HREF/)
        !           188:       {
        !           189:       $_=<STDIN>;
        !           190:       chomp;
        !           191:       $_ = &do_line($_);
        !           192:       $_ =~ s/\s+$//;
        !           193:       $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
        !           194:       print TEMP "<a href=\"$1.html\">$_</a>\n";
        !           195:       }
        !           196: 
        !           197:     # A comment that starts "HTML" inserts literal HTML
        !           198: 
        !           199:     elsif (/^\.\\"\s*HTML\s*(.*)/)
        !           200:       {
        !           201:       print TEMP $1;
        !           202:       }
        !           203: 
        !           204:     # A comment that starts < inserts that HTML at the end of the
        !           205:     # *next* input line - so as not to get a newline between them.
        !           206: 
        !           207:     elsif (/^\.\\"\s*(<.*>)/)
        !           208:       {
        !           209:       my($markup) = $1;
        !           210:       $_=<STDIN>;
        !           211:       chomp;
        !           212:       $_ = &do_line($_);
        !           213:       $_ =~ s/\s+$//;
        !           214:       print TEMP "$_$markup\n";
        !           215:       }
        !           216: 
        !           217:     # A comment that starts JOIN joins the next two lines together, with one
        !           218:     # space between them. Then that line is processed. This is used in some
        !           219:     # displays where two lines are needed for the "man" version. JOINSH works
        !           220:     # the same, except that it assumes this is a shell command, so removes
        !           221:     # continuation backslashes.
        !           222: 
        !           223:     elsif (/^\.\\"\s*JOIN(SH)?/)
        !           224:       {
        !           225:       my($one,$two);
        !           226:       $one = <STDIN>;
        !           227:       $two = <STDIN>;
        !           228:       $one =~ s/\s*\\e\s*$// if (defined($1));
        !           229:       chomp($one);
        !           230:       $two =~ s/^\s+//;
        !           231:       $_ = "$one $two";
        !           232:       redo;            # Process the joined lines
        !           233:       }
        !           234: 
        !           235:     # .EX/.EE are used in the pcredemo page to bracket the entire program,
        !           236:     # which is unmodified except for turning backslash into "\e".
        !           237: 
        !           238:     elsif (/^\.EX\s*$/)
        !           239:       {
        !           240:       print TEMP "<PRE>\n";
        !           241:       while (<STDIN>)
        !           242:         {
        !           243:         last if /^\.EE\s*$/;
        !           244:         s/\\e/\\/g;
        !           245:         s/&/&amp;/g;
        !           246:         s/</&lt;/g;
        !           247:         s/>/&gt;/g;
        !           248:         print TEMP;
        !           249:         }
        !           250:       }
        !           251: 
        !           252:     # Ignore anything not recognized
        !           253: 
        !           254:     next;
        !           255:     }
        !           256: 
        !           257:   # Line does not begin with a dot. Replace blank lines with new paragraphs
        !           258: 
        !           259:   if (/^\s*$/)
        !           260:     {
        !           261:     &end_para() if ($wrotetext);
        !           262:     next;
        !           263:     }
        !           264: 
        !           265:   # Convert fonts changes and output an ordinary line. Ensure that indented
        !           266:   # lines are marked as literal.
        !           267: 
        !           268:   $_ = &do_line($_);
        !           269:   &new_para() if (!$inpara);
        !           270: 
        !           271:   if (/^\s/)
        !           272:     {
        !           273:     if (!$inpre)
        !           274:       {
        !           275:       print TEMP "<pre>\n";
        !           276:       $inpre = 1;
        !           277:       }
        !           278:     }
        !           279:   elsif ($inpre)
        !           280:     {
        !           281:     print TEMP "</pre>\n";
        !           282:     $inpre = 0;
        !           283:     }
        !           284: 
        !           285:   # Add <br> to the end of a non-literal line if we are within .nf/.fi
        !           286: 
        !           287:   $_ .= "<br>\n" if (!$inpre && $innf);
        !           288: 
        !           289:   print TEMP;
        !           290:   $wrotetext = 1;
        !           291:   }
        !           292: 
        !           293: # The TOC, if present, will have been written - terminate it
        !           294: 
        !           295: print "</ul>\n" if ($toc);
        !           296: 
        !           297: # Copy the remainder to the standard output
        !           298: 
        !           299: close(TEMP);
        !           300: open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
        !           301: 
        !           302: print while (<TEMP>);
        !           303: 
        !           304: print <<End ;
        !           305: <p>
        !           306: Return to the <a href="index.html">PCRE index page</a>.
        !           307: </p>
        !           308: End
        !           309: 
        !           310: close(TEMP);
        !           311: unlink("/tmp/$$");
        !           312: 
        !           313: # End

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>