Annotation of embedaddon/pcre/CleanTxt, revision 1.1

1.1     ! misho       1: #! /usr/bin/perl -w
        !             2: 
        !             3: # Script to take the output of nroff -man and remove all the backspacing and
        !             4: # the page footers and the screen commands etc so that it is more usefully
        !             5: # readable online. In fact, in the latest nroff, intermediate footers don't
        !             6: # seem to be generated any more.
        !             7: 
        !             8: $blankcount = 0;
        !             9: $lastwascut = 0;
        !            10: $firstheader = 1;
        !            11: 
        !            12: # Input on STDIN; output to STDOUT.
        !            13: 
        !            14: while (<STDIN>)
        !            15:   {
        !            16:   s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
        !            17:   s/.\x8//g;         # Remove "char, backspace"
        !            18: 
        !            19:   # Handle header lines. Retain only the first one we encounter, but remove
        !            20:   # the blank line that follows. Any others (e.g. at end of document) and the
        !            21:   # following blank line are dropped.
        !            22: 
        !            23:   if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
        !            24:     {
        !            25:     if ($firstheader)
        !            26:       {
        !            27:       $firstheader = 0;
        !            28:       print;
        !            29:       $lastprinted = $_;
        !            30:       $lastwascut = 0;
        !            31:       }
        !            32:     $_=<STDIN>;       # Remove a blank that follows
        !            33:     next;
        !            34:     }
        !            35: 
        !            36:   # Count runs of empty lines
        !            37: 
        !            38:   if (/^\s*$/)
        !            39:     {
        !            40:     $blankcount++;
        !            41:     $lastwascut = 0;
        !            42:     next;
        !            43:     }
        !            44: 
        !            45:   # If a chunk of lines has been cut out (page footer) and the next line
        !            46:   # has a different indentation, put back one blank line.
        !            47: 
        !            48:   if ($lastwascut && $blankcount < 1 && defined($lastprinted))
        !            49:     {
        !            50:     ($a) = $lastprinted =~ /^(\s*)/;
        !            51:     ($b) = $_ =~ /^(\s*)/;
        !            52:     $blankcount++ if ($a ne $b);
        !            53:     }
        !            54: 
        !            55:   # We get here only when we have a non-blank line in hand. If it was preceded
        !            56:   # by 3 or more blank lines, read the next 3 lines and see if they are blank.
        !            57:   # If so, remove all 7 lines, and remember that we have just done a cut.
        !            58: 
        !            59:   if ($blankcount >= 3)
        !            60:     {
        !            61:     for ($i = 0; $i < 3; $i++)
        !            62:       {
        !            63:       $next[$i] = <STDIN>;
        !            64:       $next[$i] = "" if !defined $next[$i];
        !            65:       $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
        !            66:       $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
        !            67:       }
        !            68: 
        !            69:     # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
        !            70: 
        !            71:     if ($next[0] =~ /^\s*$/ &&
        !            72:         $next[1] =~ /^\s*$/ &&
        !            73:         $next[2] =~ /^\s*$/)
        !            74:       {
        !            75:       $blankcount -= 3;
        !            76:       $lastwascut = 1;
        !            77:       }
        !            78: 
        !            79:     # Otherwise output the saved blanks, the current, and the next three
        !            80:     # lines. Remember the last printed line.
        !            81: 
        !            82:     else
        !            83:       {
        !            84:       for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
        !            85:       print;
        !            86:       for ($i = 0; $i < 3; $i++)
        !            87:         {
        !            88:         $next[$i] =~ s/.\x8//g;
        !            89:         print $next[$i];
        !            90:         $lastprinted = $_;
        !            91:         }
        !            92:       $lastwascut = 0;
        !            93:       $blankcount = 0;
        !            94:       }
        !            95:     }
        !            96: 
        !            97:   # This non-blank line is not preceded by 3 or more blank lines. Output
        !            98:   # any blanks there are, and the line. Remember it. Force two blank lines
        !            99:   # before headings.
        !           100: 
        !           101:   else
        !           102:     {
        !           103:     $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
        !           104:       defined($lastprinted);
        !           105:     for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
        !           106:     print;
        !           107:     $lastprinted = $_;
        !           108:     $lastwascut = 0;
        !           109:     $blankcount = 0;
        !           110:     }
        !           111:   }
        !           112: 
        !           113: # End

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>