File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / 132html
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:04 2014 UTC (10 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: #! /usr/bin/perl -w
    2: 
    3: # Script to turn PCRE man pages into HTML
    4: 
    5: 
    6: # Subroutine to handle font changes and other escapes
    7: 
    8: sub do_line {
    9: my($s) = $_[0];
   10: 
   11: $s =~ s/</&#60;/g;                   # Deal with < and >
   12: $s =~ s/>/&#62;/g;
   13: $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
   14: $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
   15: $s =~ s"\\e"\\"g;
   16: $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
   17: $s;
   18: }
   19: 
   20: # Subroutine to ensure not in a paragraph
   21: 
   22: sub end_para {
   23: if ($inpara)
   24:   {
   25:   print TEMP "</PRE>\n" if ($inpre);
   26:   print TEMP "</P>\n";
   27:   }
   28: $inpara = $inpre = 0;
   29: $wrotetext = 0;
   30: }
   31: 
   32: # Subroutine to start a new paragraph
   33: 
   34: sub new_para {
   35: &end_para();
   36: print TEMP "<P>\n";
   37: $inpara = 1;
   38: }
   39: 
   40: 
   41: # Main program
   42: 
   43: $innf = 0;
   44: $inpara = 0;
   45: $inpre = 0;
   46: $wrotetext = 0;
   47: $toc = 0;
   48: $ref = 1;
   49: 
   50: while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
   51:   {
   52:   $toc = 1 if $ARGV[0] eq "-toc";
   53:   shift;
   54:   }
   55: 
   56: # Initial output to STDOUT
   57: 
   58: print <<End ;
   59: <html>
   60: <head>
   61: <title>$ARGV[0] specification</title>
   62: </head>
   63: <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
   64: <h1>$ARGV[0] man page</h1>
   65: <p>
   66: Return to the <a href="index.html">PCRE index page</a>.
   67: </p>
   68: <p>
   69: This page is part of the PCRE HTML documentation. It was generated automatically
   70: from the original man page. If there is any nonsense in it, please consult the
   71: man page, in case the conversion went wrong.
   72: <br>
   73: End
   74: 
   75: print "<ul>\n" if ($toc);
   76: 
   77: open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
   78: 
   79: while (<STDIN>)
   80:   {
   81:   # Handle lines beginning with a dot
   82: 
   83:   if (/^\./)
   84:     {
   85:     # Some of the PCRE man pages used to contain instances of .br. However,
   86:     # they should have all been removed because they cause trouble in some
   87:     # (other) automated systems that translate man pages to HTML. Complain if
   88:     # we find .br or .in (another macro that is deprecated).
   89: 
   90:     if (/^\.br/ || /^\.in/)
   91:       {
   92:       print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
   93:       print STDERR "*** $_\n";
   94:       die "*** Processing abandoned\n";
   95:       }
   96: 
   97:     # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
   98: 
   99:     elsif (/^\.nf/)
  100:       {
  101:       $innf = 1;
  102:       }
  103: 
  104:     elsif (/^\.fi/)
  105:       {
  106:       $innf = 0;
  107:       }
  108: 
  109:     # Handling .sp is subtle. If it is inside a literal section, do nothing if
  110:     # the next line is a non literal text line; similarly, if not inside a
  111:     # literal section, do nothing if a literal follows, unless we are inside
  112:     # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
  113:     # literal sections will do the spacing. Always skip if no previous output.
  114: 
  115:     elsif (/^\.sp/)
  116:       {
  117:       if ($wrotetext)
  118:         {
  119:         $_ = <STDIN>;
  120:         if ($inpre)
  121:           {
  122:           print TEMP "\n" if (/^[\s.]/);
  123:           }
  124:         else
  125:           {
  126:           print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
  127:           }
  128:         redo;    # Now process the lookahead line we just read
  129:         }
  130:       }
  131:     elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
  132:       {
  133:       &new_para();
  134:       }
  135:     elsif (/^\.SH\s*("?)(.*)\1/)
  136:       {
  137:       # Ignore the NAME section
  138:       if ($2 =~ /^NAME\b/)
  139:         {
  140:         <STDIN>;
  141:         next;
  142:         }
  143: 
  144:       &end_para();
  145:       my($title) = &do_line($2);
  146:       if ($toc)
  147:         {
  148:         printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
  149:           $ref, $ref);
  150:         printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
  151:           $ref, $ref);
  152:         $ref++;
  153:         }
  154:       else
  155:         {
  156:         print TEMP "<br><b>\n$title\n</b><br>\n";
  157:         }
  158:       }
  159:     elsif (/^\.SS\s*("?)(.*)\1/)
  160:       {
  161:       &end_para();
  162:       my($title) = &do_line($2);
  163:       print TEMP "<br><b>\n$title\n</b><br>\n";
  164:       }
  165:     elsif (/^\.B\s*(.*)/)
  166:       {
  167:       &new_para() if (!$inpara);
  168:       $_ = &do_line($1);
  169:       s/"(.*?)"/$1/g;
  170:       print TEMP "<b>$_</b>\n";
  171:       $wrotetext = 1;
  172:       }
  173:     elsif (/^\.I\s*(.*)/)
  174:       {
  175:       &new_para() if (!$inpara);
  176:       $_ = &do_line($1);
  177:       s/"(.*?)"/$1/g;
  178:       print TEMP "<i>$_</i>\n";
  179:       $wrotetext = 1;
  180:       }
  181: 
  182:     # A comment that starts "HREF" takes the next line as a name that
  183:     # is turned into a hyperlink, using the text given, which might be
  184:     # in a special font. If it ends in () or (digits) or punctuation, they
  185:     # aren't part of the link.
  186: 
  187:     elsif (/^\.\\"\s*HREF/)
  188:       {
  189:       $_=<STDIN>;
  190:       chomp;
  191:       $_ = &do_line($_);
  192:       $_ =~ s/\s+$//;
  193:       $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
  194:       print TEMP "<a href=\"$1.html\">$_</a>\n";
  195:       }
  196: 
  197:     # A comment that starts "HTML" inserts literal HTML
  198: 
  199:     elsif (/^\.\\"\s*HTML\s*(.*)/)
  200:       {
  201:       print TEMP $1;
  202:       }
  203: 
  204:     # A comment that starts < inserts that HTML at the end of the
  205:     # *next* input line - so as not to get a newline between them.
  206: 
  207:     elsif (/^\.\\"\s*(<.*>)/)
  208:       {
  209:       my($markup) = $1;
  210:       $_=<STDIN>;
  211:       chomp;
  212:       $_ = &do_line($_);
  213:       $_ =~ s/\s+$//;
  214:       print TEMP "$_$markup\n";
  215:       }
  216: 
  217:     # A comment that starts JOIN joins the next two lines together, with one
  218:     # space between them. Then that line is processed. This is used in some
  219:     # displays where two lines are needed for the "man" version. JOINSH works
  220:     # the same, except that it assumes this is a shell command, so removes
  221:     # continuation backslashes.
  222: 
  223:     elsif (/^\.\\"\s*JOIN(SH)?/)
  224:       {
  225:       my($one,$two);
  226:       $one = <STDIN>;
  227:       $two = <STDIN>;
  228:       $one =~ s/\s*\\e\s*$// if (defined($1));
  229:       chomp($one);
  230:       $two =~ s/^\s+//;
  231:       $_ = "$one $two";
  232:       redo;            # Process the joined lines
  233:       }
  234: 
  235:     # .EX/.EE are used in the pcredemo page to bracket the entire program,
  236:     # which is unmodified except for turning backslash into "\e".
  237: 
  238:     elsif (/^\.EX\s*$/)
  239:       {
  240:       print TEMP "<PRE>\n";
  241:       while (<STDIN>)
  242:         {
  243:         last if /^\.EE\s*$/;
  244:         s/\\e/\\/g;
  245:         s/&/&amp;/g;
  246:         s/</&lt;/g;
  247:         s/>/&gt;/g;
  248:         print TEMP;
  249:         }
  250:       }
  251: 
  252:     # Ignore anything not recognized
  253: 
  254:     next;
  255:     }
  256: 
  257:   # Line does not begin with a dot. Replace blank lines with new paragraphs
  258: 
  259:   if (/^\s*$/)
  260:     {
  261:     &end_para() if ($wrotetext);
  262:     next;
  263:     }
  264: 
  265:   # Convert fonts changes and output an ordinary line. Ensure that indented
  266:   # lines are marked as literal.
  267: 
  268:   $_ = &do_line($_);
  269:   &new_para() if (!$inpara);
  270: 
  271:   if (/^\s/)
  272:     {
  273:     if (!$inpre)
  274:       {
  275:       print TEMP "<pre>\n";
  276:       $inpre = 1;
  277:       }
  278:     }
  279:   elsif ($inpre)
  280:     {
  281:     print TEMP "</pre>\n";
  282:     $inpre = 0;
  283:     }
  284: 
  285:   # Add <br> to the end of a non-literal line if we are within .nf/.fi
  286: 
  287:   $_ .= "<br>\n" if (!$inpre && $innf);
  288: 
  289:   print TEMP;
  290:   $wrotetext = 1;
  291:   }
  292: 
  293: # The TOC, if present, will have been written - terminate it
  294: 
  295: print "</ul>\n" if ($toc);
  296: 
  297: # Copy the remainder to the standard output
  298: 
  299: close(TEMP);
  300: open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
  301: 
  302: print while (<TEMP>);
  303: 
  304: print <<End ;
  305: <p>
  306: Return to the <a href="index.html">PCRE index page</a>.
  307: </p>
  308: End
  309: 
  310: close(TEMP);
  311: unlink("/tmp/$$");
  312: 
  313: # End

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>