File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / CleanTxt
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 08:25:56 2013 UTC (10 years, 10 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, v8_33, v8_31, v8_30, v8_21, HEAD
8.33

    1: #! /usr/bin/perl -w
    2: 
    3: # Script to take the output of nroff -man and remove all the backspacing and
    4: # the page footers and the screen commands etc so that it is more usefully
    5: # readable online. In fact, in the latest nroff, intermediate footers don't
    6: # seem to be generated any more.
    7: 
    8: $blankcount = 0;
    9: $lastwascut = 0;
   10: $firstheader = 1;
   11: 
   12: # Input on STDIN; output to STDOUT.
   13: 
   14: while (<STDIN>)
   15:   {
   16:   s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
   17:   s/.\x8//g;         # Remove "char, backspace"
   18: 
   19:   # Handle header lines. Retain only the first one we encounter, but remove
   20:   # the blank line that follows. Any others (e.g. at end of document) and the
   21:   # following blank line are dropped.
   22: 
   23:   if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
   24:     {
   25:     if ($firstheader)
   26:       {
   27:       $firstheader = 0;
   28:       print;
   29:       $lastprinted = $_;
   30:       $lastwascut = 0;
   31:       }
   32:     $_=<STDIN>;       # Remove a blank that follows
   33:     next;
   34:     }
   35: 
   36:   # Count runs of empty lines
   37: 
   38:   if (/^\s*$/)
   39:     {
   40:     $blankcount++;
   41:     $lastwascut = 0;
   42:     next;
   43:     }
   44: 
   45:   # If a chunk of lines has been cut out (page footer) and the next line
   46:   # has a different indentation, put back one blank line.
   47: 
   48:   if ($lastwascut && $blankcount < 1 && defined($lastprinted))
   49:     {
   50:     ($a) = $lastprinted =~ /^(\s*)/;
   51:     ($b) = $_ =~ /^(\s*)/;
   52:     $blankcount++ if ($a ne $b);
   53:     }
   54: 
   55:   # We get here only when we have a non-blank line in hand. If it was preceded
   56:   # by 3 or more blank lines, read the next 3 lines and see if they are blank.
   57:   # If so, remove all 7 lines, and remember that we have just done a cut.
   58: 
   59:   if ($blankcount >= 3)
   60:     {
   61:     for ($i = 0; $i < 3; $i++)
   62:       {
   63:       $next[$i] = <STDIN>;
   64:       $next[$i] = "" if !defined $next[$i];
   65:       $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
   66:       $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
   67:       }
   68: 
   69:     # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
   70: 
   71:     if ($next[0] =~ /^\s*$/ &&
   72:         $next[1] =~ /^\s*$/ &&
   73:         $next[2] =~ /^\s*$/)
   74:       {
   75:       $blankcount -= 3;
   76:       $lastwascut = 1;
   77:       }
   78: 
   79:     # Otherwise output the saved blanks, the current, and the next three
   80:     # lines. Remember the last printed line.
   81: 
   82:     else
   83:       {
   84:       for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
   85:       print;
   86:       for ($i = 0; $i < 3; $i++)
   87:         {
   88:         $next[$i] =~ s/.\x8//g;
   89:         print $next[$i];
   90:         $lastprinted = $_;
   91:         }
   92:       $lastwascut = 0;
   93:       $blankcount = 0;
   94:       }
   95:     }
   96: 
   97:   # This non-blank line is not preceded by 3 or more blank lines. Output
   98:   # any blanks there are, and the line. Remember it. Force two blank lines
   99:   # before headings.
  100: 
  101:   else
  102:     {
  103:     $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
  104:       defined($lastprinted);
  105:     for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
  106:     print;
  107:     $lastprinted = $_;
  108:     $lastwascut = 0;
  109:     $blankcount = 0;
  110:     }
  111:   }
  112: 
  113: # End

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>