Annotation of embedaddon/pcre/CleanTxt, revision 1.1
1.1 ! misho 1: #! /usr/bin/perl -w
! 2:
! 3: # Script to take the output of nroff -man and remove all the backspacing and
! 4: # the page footers and the screen commands etc so that it is more usefully
! 5: # readable online. In fact, in the latest nroff, intermediate footers don't
! 6: # seem to be generated any more.
! 7:
! 8: $blankcount = 0;
! 9: $lastwascut = 0;
! 10: $firstheader = 1;
! 11:
! 12: # Input on STDIN; output to STDOUT.
! 13:
! 14: while (<STDIN>)
! 15: {
! 16: s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
! 17: s/.\x8//g; # Remove "char, backspace"
! 18:
! 19: # Handle header lines. Retain only the first one we encounter, but remove
! 20: # the blank line that follows. Any others (e.g. at end of document) and the
! 21: # following blank line are dropped.
! 22:
! 23: if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
! 24: {
! 25: if ($firstheader)
! 26: {
! 27: $firstheader = 0;
! 28: print;
! 29: $lastprinted = $_;
! 30: $lastwascut = 0;
! 31: }
! 32: $_=<STDIN>; # Remove a blank that follows
! 33: next;
! 34: }
! 35:
! 36: # Count runs of empty lines
! 37:
! 38: if (/^\s*$/)
! 39: {
! 40: $blankcount++;
! 41: $lastwascut = 0;
! 42: next;
! 43: }
! 44:
! 45: # If a chunk of lines has been cut out (page footer) and the next line
! 46: # has a different indentation, put back one blank line.
! 47:
! 48: if ($lastwascut && $blankcount < 1 && defined($lastprinted))
! 49: {
! 50: ($a) = $lastprinted =~ /^(\s*)/;
! 51: ($b) = $_ =~ /^(\s*)/;
! 52: $blankcount++ if ($a ne $b);
! 53: }
! 54:
! 55: # We get here only when we have a non-blank line in hand. If it was preceded
! 56: # by 3 or more blank lines, read the next 3 lines and see if they are blank.
! 57: # If so, remove all 7 lines, and remember that we have just done a cut.
! 58:
! 59: if ($blankcount >= 3)
! 60: {
! 61: for ($i = 0; $i < 3; $i++)
! 62: {
! 63: $next[$i] = <STDIN>;
! 64: $next[$i] = "" if !defined $next[$i];
! 65: $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
! 66: $next[$i] =~ s/.\x8//g; # Remove "char, backspace"
! 67: }
! 68:
! 69: # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
! 70:
! 71: if ($next[0] =~ /^\s*$/ &&
! 72: $next[1] =~ /^\s*$/ &&
! 73: $next[2] =~ /^\s*$/)
! 74: {
! 75: $blankcount -= 3;
! 76: $lastwascut = 1;
! 77: }
! 78:
! 79: # Otherwise output the saved blanks, the current, and the next three
! 80: # lines. Remember the last printed line.
! 81:
! 82: else
! 83: {
! 84: for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
! 85: print;
! 86: for ($i = 0; $i < 3; $i++)
! 87: {
! 88: $next[$i] =~ s/.\x8//g;
! 89: print $next[$i];
! 90: $lastprinted = $_;
! 91: }
! 92: $lastwascut = 0;
! 93: $blankcount = 0;
! 94: }
! 95: }
! 96:
! 97: # This non-blank line is not preceded by 3 or more blank lines. Output
! 98: # any blanks there are, and the line. Remember it. Force two blank lines
! 99: # before headings.
! 100:
! 101: else
! 102: {
! 103: $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
! 104: defined($lastprinted);
! 105: for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
! 106: print;
! 107: $lastprinted = $_;
! 108: $lastwascut = 0;
! 109: $blankcount = 0;
! 110: }
! 111: }
! 112:
! 113: # End
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>