Annotation of embedaddon/pcre/CleanTxt, revision 1.1.1.1
1.1 misho 1: #! /usr/bin/perl -w
2:
3: # Script to take the output of nroff -man and remove all the backspacing and
4: # the page footers and the screen commands etc so that it is more usefully
5: # readable online. In fact, in the latest nroff, intermediate footers don't
6: # seem to be generated any more.
7:
8: $blankcount = 0;
9: $lastwascut = 0;
10: $firstheader = 1;
11:
12: # Input on STDIN; output to STDOUT.
13:
14: while (<STDIN>)
15: {
16: s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
17: s/.\x8//g; # Remove "char, backspace"
18:
19: # Handle header lines. Retain only the first one we encounter, but remove
20: # the blank line that follows. Any others (e.g. at end of document) and the
21: # following blank line are dropped.
22:
23: if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
24: {
25: if ($firstheader)
26: {
27: $firstheader = 0;
28: print;
29: $lastprinted = $_;
30: $lastwascut = 0;
31: }
32: $_=<STDIN>; # Remove a blank that follows
33: next;
34: }
35:
36: # Count runs of empty lines
37:
38: if (/^\s*$/)
39: {
40: $blankcount++;
41: $lastwascut = 0;
42: next;
43: }
44:
45: # If a chunk of lines has been cut out (page footer) and the next line
46: # has a different indentation, put back one blank line.
47:
48: if ($lastwascut && $blankcount < 1 && defined($lastprinted))
49: {
50: ($a) = $lastprinted =~ /^(\s*)/;
51: ($b) = $_ =~ /^(\s*)/;
52: $blankcount++ if ($a ne $b);
53: }
54:
55: # We get here only when we have a non-blank line in hand. If it was preceded
56: # by 3 or more blank lines, read the next 3 lines and see if they are blank.
57: # If so, remove all 7 lines, and remember that we have just done a cut.
58:
59: if ($blankcount >= 3)
60: {
61: for ($i = 0; $i < 3; $i++)
62: {
63: $next[$i] = <STDIN>;
64: $next[$i] = "" if !defined $next[$i];
65: $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
66: $next[$i] =~ s/.\x8//g; # Remove "char, backspace"
67: }
68:
69: # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
70:
71: if ($next[0] =~ /^\s*$/ &&
72: $next[1] =~ /^\s*$/ &&
73: $next[2] =~ /^\s*$/)
74: {
75: $blankcount -= 3;
76: $lastwascut = 1;
77: }
78:
79: # Otherwise output the saved blanks, the current, and the next three
80: # lines. Remember the last printed line.
81:
82: else
83: {
84: for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
85: print;
86: for ($i = 0; $i < 3; $i++)
87: {
88: $next[$i] =~ s/.\x8//g;
89: print $next[$i];
90: $lastprinted = $_;
91: }
92: $lastwascut = 0;
93: $blankcount = 0;
94: }
95: }
96:
97: # This non-blank line is not preceded by 3 or more blank lines. Output
98: # any blanks there are, and the line. Remember it. Force two blank lines
99: # before headings.
100:
101: else
102: {
103: $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
104: defined($lastprinted);
105: for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
106: print;
107: $lastprinted = $_;
108: $lastwascut = 0;
109: $blankcount = 0;
110: }
111: }
112:
113: # End
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>