embedaddon/pcre/132html - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / 132html
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:04 2014 UTC (10 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD

pcre 8.34

1: #! /usr/bin/perl -w 2: 3: # Script to turn PCRE man pages into HTML 4: 5: 6: # Subroutine to handle font changes and other escapes 7: 8: sub do_line { 9: my($s) = $_[0]; 10: 11: $s =~ s/</</g; # Deal with < and > 12: $s =~ s/>/>/g; 13: $s =~ s"\\fI(.*?)\\f[RP]"$1"g; 14: $s =~ s"\\fB(.*?)\\f[RP]"$1"g; 15: $s =~ s"\\e"\\"g; 16: $s =~ s/(?<=Copyright )$c$/©/g; 17: $s; 18: } 19: 20: # Subroutine to ensure not in a paragraph 21: 22: sub end_para { 23: if ($inpara) 24: { 25: print TEMP "</PRE>\n" if ($inpre); 26: print TEMP "\n"; 27: } 28: $inpara = $inpre = 0; 29: $wrotetext = 0; 30: } 31: 32: # Subroutine to start a new paragraph 33: 34: sub new_para { 35: &end_para(); 36: print TEMP "\n"; 37: $inpara = 1; 38: } 39: 40: 41: # Main program 42: 43: $innf = 0; 44: $inpara = 0; 45: $inpre = 0; 46: $wrotetext = 0; 47: $toc = 0; 48: $ref = 1; 49: 50: while ($#ARGV >= 0 && $ARGV[0] =~ /^-/) 51: { 52: $toc = 1 if $ARGV[0] eq "-toc"; 53: shift; 54: } 55: 56: # Initial output to STDOUT 57: 58: print <<End ; 59: <html> 60: <head> 61: <title>$ARGV[0] specification</title> 62: </head> 63: <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB"> 64: <h1>$ARGV[0] man page</h1> 65: 66: Return to the <a href="index.html">PCRE index page</a>. 67: 68: 69: This page is part of the PCRE HTML documentation. It was generated automatically 70: from the original man page. If there is any nonsense in it, please consult the 71: man page, in case the conversion went wrong. 72: 73: End 74: 75: print "<ul>\n" if ($toc); 76: 77: open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n"; 78: 79: while (<STDIN>) 80: { 81: # Handle lines beginning with a dot 82: 83: if (/^\./) 84: { 85: # Some of the PCRE man pages used to contain instances of .br. However, 86: # they should have all been removed because they cause trouble in some 87: # (other) automated systems that translate man pages to HTML. Complain if 88: # we find .br or .in (another macro that is deprecated). 89: 90: if (/^\.br/ || /^\.in/) 91: { 92: print STDERR "\n*** Deprecated macro encountered - rewrite needed\n"; 93: print STDERR "*** $_\n"; 94: die "*** Processing abandoned\n"; 95: } 96: 97: # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi. 98: 99: elsif (/^\.nf/) 100: { 101: $innf = 1; 102: } 103: 104: elsif (/^\.fi/) 105: { 106: $innf = 0; 107: } 108: 109: # Handling .sp is subtle. If it is inside a literal section, do nothing if 110: # the next line is a non literal text line; similarly, if not inside a 111: # literal section, do nothing if a literal follows, unless we are inside 112: # a .nf/.ne section. The point being that the <pre> and </pre> that delimit 113: # literal sections will do the spacing. Always skip if no previous output. 114: 115: elsif (/^\.sp/) 116: { 117: if ($wrotetext) 118: { 119: $_ = <STDIN>; 120: if ($inpre) 121: { 122: print TEMP "\n" if (/^[\s.]/); 123: } 124: else 125: { 126: print TEMP " \n \n" if ($innf || !/^[\s.]/); 127: } 128: redo; # Now process the lookahead line we just read 129: } 130: } 131: elsif (/^\.TP/ || /^\.PP/ || /^\.P/) 132: { 133: &new_para(); 134: } 135: elsif (/^\.SH\s*("?)(.*)\1/) 136: { 137: # Ignore the NAME section 138: if ($2 =~ /^NAME\b/) 139: { 140: <STDIN>; 141: next; 142: } 143: 144: &end_para(); 145: my($title) = &do_line($2); 146: if ($toc) 147: { 148: printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n", 149: $ref, $ref); 150: printf TEMP (" <a name=\"SEC%d\" href=\"#TOC1\">$title</a> \n", 151: $ref, $ref); 152: $ref++; 153: } 154: else 155: { 156: print TEMP " \n$title\n \n"; 157: } 158: } 159: elsif (/^\.SS\s*("?)(.*)\1/) 160: { 161: &end_para(); 162: my($title) = &do_line($2); 163: print TEMP " \n$title\n \n"; 164: } 165: elsif (/^\.B\s*(.*)/) 166: { 167: &new_para() if (!$inpara); 168: $_ = &do_line($1); 169: s/"(.*?)"/$1/g; 170: print TEMP "$_\n"; 171: $wrotetext = 1; 172: } 173: elsif (/^\.I\s*(.*)/) 174: { 175: &new_para() if (!$inpara); 176: $_ = &do_line($1); 177: s/"(.*?)"/$1/g; 178: print TEMP "$_\n"; 179: $wrotetext = 1; 180: } 181: 182: # A comment that starts "HREF" takes the next line as a name that 183: # is turned into a hyperlink, using the text given, which might be 184: # in a special font. If it ends in () or (digits) or punctuation, they 185: # aren't part of the link. 186: 187: elsif (/^\.\\"\s*HREF/) 188: { 189: $_=<STDIN>; 190: chomp; 191: $_ = &do_line($_); 192: $_ =~ s/\s+$//; 193: $_ =~ /^(?:<.>)?([^<(]+)(?:)?(?:<\/.>)?(?:$\d+$)?[.,;:]?$/; 194: print TEMP "<a href=\"$1.html\">$_</a>\n"; 195: } 196: 197: # A comment that starts "HTML" inserts literal HTML 198: 199: elsif (/^\.\\"\s*HTML\s*(.*)/) 200: { 201: print TEMP $1; 202: } 203: 204: # A comment that starts < inserts that HTML at the end of the 205: # *next* input line - so as not to get a newline between them. 206: 207: elsif (/^\.\\"\s*(<.*>)/) 208: { 209: my($markup) = $1; 210: $_=<STDIN>; 211: chomp; 212: $_ = &do_line($_); 213: $_ =~ s/\s+$//; 214: print TEMP "$_$markup\n"; 215: } 216: 217: # A comment that starts JOIN joins the next two lines together, with one 218: # space between them. Then that line is processed. This is used in some 219: # displays where two lines are needed for the "man" version. JOINSH works 220: # the same, except that it assumes this is a shell command, so removes 221: # continuation backslashes. 222: 223: elsif (/^\.\\"\s*JOIN(SH)?/) 224: { 225: my($one,$two); 226: $one = <STDIN>; 227: $two = <STDIN>; 228: $one =~ s/\s*\\e\s*$// if (defined($1)); 229: chomp($one); 230: $two =~ s/^\s+//; 231: $_ = "$one $two"; 232: redo; # Process the joined lines 233: } 234: 235: # .EX/.EE are used in the pcredemo page to bracket the entire program, 236: # which is unmodified except for turning backslash into "\e". 237: 238: elsif (/^\.EX\s*$/) 239: { 240: print TEMP "<PRE>\n"; 241: while (<STDIN>) 242: { 243: last if /^\.EE\s*$/; 244: s/\\e/\\/g; 245: s/&/&/g; 246: s/</</g; 247: s/>/>/g; 248: print TEMP; 249: } 250: } 251: 252: # Ignore anything not recognized 253: 254: next; 255: } 256: 257: # Line does not begin with a dot. Replace blank lines with new paragraphs 258: 259: if (/^\s*$/) 260: { 261: &end_para() if ($wrotetext); 262: next; 263: } 264: 265: # Convert fonts changes and output an ordinary line. Ensure that indented 266: # lines are marked as literal. 267: 268: $_ = &do_line($_); 269: &new_para() if (!$inpara); 270: 271: if (/^\s/) 272: { 273: if (!$inpre) 274: { 275: print TEMP "<pre>\n"; 276: $inpre = 1; 277: } 278: } 279: elsif ($inpre) 280: { 281: print TEMP "</pre>\n"; 282: $inpre = 0; 283: } 284: 285: # Add to the end of a non-literal line if we are within .nf/.fi 286: 287: $_ .= " \n" if (!$inpre && $innf); 288: 289: print TEMP; 290: $wrotetext = 1; 291: } 292: 293: # The TOC, if present, will have been written - terminate it 294: 295: print "</ul>\n" if ($toc); 296: 297: # Copy the remainder to the standard output 298: 299: close(TEMP); 300: open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n"; 301: 302: print while (<TEMP>); 303: 304: print <<End ; 305: 306: Return to the <a href="index.html">PCRE index page</a>. 307: 308: End 309: 310: close(TEMP); 311: unlink("/tmp/$$"); 312: 313: # End