Annotation of embedaddon/pcre/CleanTxt, revision 1.1.1.1

1.1       misho       1: #! /usr/bin/perl -w
                      2: 
                      3: # Script to take the output of nroff -man and remove all the backspacing and
                      4: # the page footers and the screen commands etc so that it is more usefully
                      5: # readable online. In fact, in the latest nroff, intermediate footers don't
                      6: # seem to be generated any more.
                      7: 
                      8: $blankcount = 0;
                      9: $lastwascut = 0;
                     10: $firstheader = 1;
                     11: 
                     12: # Input on STDIN; output to STDOUT.
                     13: 
                     14: while (<STDIN>)
                     15:   {
                     16:   s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
                     17:   s/.\x8//g;         # Remove "char, backspace"
                     18: 
                     19:   # Handle header lines. Retain only the first one we encounter, but remove
                     20:   # the blank line that follows. Any others (e.g. at end of document) and the
                     21:   # following blank line are dropped.
                     22: 
                     23:   if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
                     24:     {
                     25:     if ($firstheader)
                     26:       {
                     27:       $firstheader = 0;
                     28:       print;
                     29:       $lastprinted = $_;
                     30:       $lastwascut = 0;
                     31:       }
                     32:     $_=<STDIN>;       # Remove a blank that follows
                     33:     next;
                     34:     }
                     35: 
                     36:   # Count runs of empty lines
                     37: 
                     38:   if (/^\s*$/)
                     39:     {
                     40:     $blankcount++;
                     41:     $lastwascut = 0;
                     42:     next;
                     43:     }
                     44: 
                     45:   # If a chunk of lines has been cut out (page footer) and the next line
                     46:   # has a different indentation, put back one blank line.
                     47: 
                     48:   if ($lastwascut && $blankcount < 1 && defined($lastprinted))
                     49:     {
                     50:     ($a) = $lastprinted =~ /^(\s*)/;
                     51:     ($b) = $_ =~ /^(\s*)/;
                     52:     $blankcount++ if ($a ne $b);
                     53:     }
                     54: 
                     55:   # We get here only when we have a non-blank line in hand. If it was preceded
                     56:   # by 3 or more blank lines, read the next 3 lines and see if they are blank.
                     57:   # If so, remove all 7 lines, and remember that we have just done a cut.
                     58: 
                     59:   if ($blankcount >= 3)
                     60:     {
                     61:     for ($i = 0; $i < 3; $i++)
                     62:       {
                     63:       $next[$i] = <STDIN>;
                     64:       $next[$i] = "" if !defined $next[$i];
                     65:       $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
                     66:       $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
                     67:       }
                     68: 
                     69:     # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
                     70: 
                     71:     if ($next[0] =~ /^\s*$/ &&
                     72:         $next[1] =~ /^\s*$/ &&
                     73:         $next[2] =~ /^\s*$/)
                     74:       {
                     75:       $blankcount -= 3;
                     76:       $lastwascut = 1;
                     77:       }
                     78: 
                     79:     # Otherwise output the saved blanks, the current, and the next three
                     80:     # lines. Remember the last printed line.
                     81: 
                     82:     else
                     83:       {
                     84:       for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
                     85:       print;
                     86:       for ($i = 0; $i < 3; $i++)
                     87:         {
                     88:         $next[$i] =~ s/.\x8//g;
                     89:         print $next[$i];
                     90:         $lastprinted = $_;
                     91:         }
                     92:       $lastwascut = 0;
                     93:       $blankcount = 0;
                     94:       }
                     95:     }
                     96: 
                     97:   # This non-blank line is not preceded by 3 or more blank lines. Output
                     98:   # any blanks there are, and the line. Remember it. Force two blank lines
                     99:   # before headings.
                    100: 
                    101:   else
                    102:     {
                    103:     $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
                    104:       defined($lastprinted);
                    105:     for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
                    106:     print;
                    107:     $lastprinted = $_;
                    108:     $lastwascut = 0;
                    109:     $blankcount = 0;
                    110:     }
                    111:   }
                    112: 
                    113: # End

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>