Annotation of embedaddon/bird2/doc/LinuxDocTools.pm, revision 1.1.1.1
1.1 misho 1: #! /usr/bin/perl
2: #
3: # LinuxDocTools.pm
4: #
5: # $Id$
6: #
7: # LinuxDoc-Tools driver core. This contains all the basic functionality
8: # we need to control all other components.
9: #
10: # © Copyright 1996, Cees de Groot.
11: # © Copyright 2000, Taketoshi Sano
12: #
13: # THIS VERSION HAS BEEN HACKED FOR BIRD BY MARTIN MARES
14: #
15: package LinuxDocTools;
16:
17: require 5.004;
18: use strict;
19:
20: =head1 NAME
21:
22: LinuxDocTools - SGML conversion utilities for LinuxDoc DTD.
23:
24: =head1 SYNOPSIS
25:
26: use LinuxDocTools;
27: LinuxDocTools::init;
28: @files = LinuxDocTools::process_options ($0, @ARGV);
29: for $curfile (@files) {
30: LinuxDocTools::process_file ($curfile);
31: }
32:
33: =head1 DESCRIPTION
34:
35: The LinuxDocTools package encapsulates all the functionality offered by
36: LinuxDoc-Tools. It is used, of course, by LinuxDoc-Tools;
37: but the encapsulation should provide for a simple interface for other users as well.
38:
39: =head1 FUNCTIONS
40:
41: =over 4
42:
43: =cut
44:
45: use DirHandle;
46: use File::Basename;
47: use File::Find;
48: use File::Copy;
49: use FileHandle;
50: use IPC::Open2;
51: use Cwd;
52: use LinuxDocTools::Lang;
53: use LinuxDocTools::Utils qw(process_options usage cleanup trap_signals remove_tmpfiles create_temp);
54: use LinuxDocTools::Vars;
55:
56: sub BEGIN
57: {
58: #
59: # Make sure we're always looking here. Note that "use lib" adds
60: # on the front of the search path, so we first push dist, then
61: # site, so that site is searched first.
62: #
63: use lib "$main::DataDir/dist";
64: use lib "$main::DataDir/site";
65: }
66:
67: =item LinuxDocTools::init
68:
69: Takes care of initialization of package-global variables (which are actually
70: defined in L<LinuxDocTools::Vars>). The package-global variables are I<$global>,
71: a reference to a hash containing numerous settings, I<%Formats>, a hash
72: containing all the formats, and I<%FmtList>, a hash containing the currently
73: active formats for help texts.
74:
75: Apart from this, C<LinuxDocTools::init> also finds all distributed and site-local
76: formatting backends and C<require>s them.
77:
78: =cut
79:
80: sub init
81: {
82: trap_signals;
83:
84: #
85: # Register the ``global'' pseudoformat. Apart from the global settings,
86: # we also use $global to keep the global variable name space clean;
87: # everything that we need to provide to other modules is stuffed
88: # into $global.
89: #
90: $global = {};
91: $global->{NAME} = "global";
92: $global->{HELP} = "";
93: $global->{OPTIONS} = [
94: { option => "backend", type => "l",
95: 'values' => [ "html", "info", "latex",
96: "lyx", "rtf", "txt", "check" ],
97: short => "B" },
98: { option => "papersize", type => "l",
99: 'values' => [ "a4", "letter" ], short => "p" },
100: { option => "language", type => "l",
101: 'values' => [ @LinuxDocTools::Lang::Languages ], short => "l" },
102: { option => "charset", type => "l",
103: 'values' => [ "latin", "ascii", "nippon", "euc-kr" ], short => "c" },
104: { option => "style", type => "s", short => "S" },
105: { option => "tabsize", type => "i", short => "t" },
106: # { option => "verbose", type => "f", short => "v" },
107: { option => "debug", type => "f", short => "d" },
108: { option => "define", type => "s", short => "D" },
109: { option => "include", type => "s", short => "i" },
110: { option => "pass", type => "s", short => "P" }
111: ];
112: $global->{backend} = "linuxdoc";
113: $global->{papersize} = "a4";
114: $global->{language} = "en";
115: $global->{charset} = "ascii";
116: $global->{style} = "";
117: $global->{tabsize} = 8;
118: $global->{verbose} = 0;
119: $global->{define} = "";
120: $global->{debug} = 0;
121: $global->{include} = "";
122: $global->{pass} = "";
123: $global->{InFiles} = [];
124: $Formats{$global->{NAME}} = $global; # All formats we know.
125: $FmtList{$global->{NAME}} = $global; # List of formats for help msgs.
126:
127: # automatic language detection: disabled by default
128: # {
129: # my $lang;
130: # foreach $lang (@LinuxDocTools::Lang::Languages)
131: # {
132: # if (($ENV{"LC_ALL"} =~ /^$lang/i) ||
133: # ($ENV{"LC_CTYPE"} =~ /^$lang/i) ||
134: # ($ENV{"LANG"} =~ /^$lang/i)) {
135: # $global->{language} = Any2ISO($lang);
136: # }
137: # }
138: # }
139:
140: #
141: # Used when the format is "global" (from sgmlcheck).
142: #
143: $global->{preNSGMLS} = sub {
144: $global->{NsgmlsOpts} .= " -s ";
145: $global->{NsgmlsPrePipe} = "cat $global->{file}";
146: };
147:
148: #
149: # Build up the list of formatters.
150: #
151: my $savdir = cwd;
152: my %Locs;
153: chdir "$main::DataDir/dist";
154: my $dir = new DirHandle(".");
155: die "Unable to read directory $main::DataDir/dist: $!" unless defined($dir);
156: foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read()))
157: {
158: $Locs{$fmt} = "dist";
159: }
160: $dir->close();
161: chdir "$main::DataDir/site";
162: $dir = new DirHandle(".");
163: die "Unable to read directory $main::DataDir/site: $!" unless defined($dir);
164: foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read()))
165: {
166: $Locs{$fmt} = "site";
167: }
168: $dir->close();
169: foreach my $fmt (keys %Locs)
170: {
171: require $fmt;
172: }
173: chdir $savdir;
174: }
175:
176: =item LinuxDocTools::process_options ($0, @ARGV)
177:
178: This function contains all initialization that is bound to the current
179: invocation of LinuxDocTools. It looks in C<$0> to deduce the backend that
180: should be used (ld2txt activates the I<txt> backend) and parses the
181: options array. It returns an array of filenames it encountered during
182: option processing.
183:
184: As a side effect, the environment variables I<SGMLDECL> and
185: I<SGML_CATALOG_FILES> are modified.
186:
187: =cut
188:
189: sub process_options
190: {
191: my $progname = shift;
192: my @args = @_;
193:
194: #
195: # Deduce the format from the caller's file name
196: #
197: my ($format, $dummy1, $dummy2) = fileparse ($progname, "");
198: $global->{myname} = $format;
199: $format =~ s/sgml2*(.*)/$1/;
200:
201: #
202: # check the option "--backend / -B"
203: #
204: if ($format eq "linuxdoc") {
205: my @backends = @args;
206: my $arg;
207: while (@backends) {
208: $arg = shift @backends;
209: if ($arg eq "-B") {
210: $arg = shift @backends;
211: $format = $arg;
212: last;
213: }
214: if ( $arg =~ s/--backend=(.*)/$1/ ) {
215: $format = $arg;
216: last;
217: }
218: }
219: }
220:
221: $format = "global" if $format eq "check";
222: usage ("") if $format eq "linuxdoc";
223: $format = "latex2e" if $format eq "latex";
224: $FmtList{$format} = $Formats{$format} or
225: usage ("$global->{myname}: unknown format");
226: $global->{format} = $format;
227:
228: #
229: # Parse all the options.
230: #
231: my @files = LinuxDocTools::Utils::process_options (@args);
232: $global->{language} = Any2ISO ($global->{language});
233: #
234: # check the number of given files
235: $#files > -1 || usage ("no filenames given");
236:
237: #
238: # Setup the SGML environment.
239: # (Note that Debian package rewrite path to catalog of
240: # iso-entities using debian/rules so that it can use
241: # entities from sgml-data pacakge. debian/rules also
242: # removes iso-entites sub directory after doing make install.)
243: #
244: $ENV{SGML_CATALOG_FILES} .= (defined $ENV{SGML_CATALOG_FILES} ? ":" : "") .
245: "$main::prefix/share/sgml/sgml-iso-entities-8879.1986/catalog:" .
246: "$main::prefix/share/sgml/entities/sgml-iso-entities-8879.1986/catalog";
247: $ENV{SGML_CATALOG_FILES} .= ":$main::DataDir/linuxdoc-tools.catalog";
248: $ENV{SGML_CATALOG_FILES} .= ":$main::/etc/sgml.catalog";
249: if (-f "$main::DataDir/dtd/$format.dcl")
250: {
251: $ENV{SGMLDECL} = "$main::DataDir/dtd/$format.dcl";
252: }
253: elsif (-f "$main::DataDir/dtd/$global->{style}.dcl")
254: {
255: $ENV{SGMLDECL} = "$main::DataDir/dtd/$global->{style}.dcl";
256: }
257: elsif (-f "$main::DataDir/dtd/sgml.dcl")
258: {
259: $ENV{SGMLDECL} = "$main::DataDir/dtd/sgml.dcl";
260: }
261:
262: #
263: # OK. Give the list of files we distilled from the options
264: # back to the caller.
265: #
266: return @files;
267: }
268:
269: =item LinuxDocTools::process_file
270:
271: With all the configuration done, this routine will take a single filename
272: and convert it to the currently active backend format. The conversion is
273: done in a number of steps in tight interaction with the currently active
274: backend (see also L<LinuxDocTools::BackEnd>):
275:
276: =over
277:
278: =item 1. Backend: set NSGMLS options and optionally create a pre-NSGMLS pipe.
279:
280: =item 2. Here: Run the preprocessor to handle conditionals.
281:
282: =item 3. Here: Run NSGMLS.
283:
284: =item 4. Backend: run pre-ASP conversion.
285:
286: =item 5. Here: Run SGMLSASP.
287:
288: =item 6. Backend: run post-ASP conversion, generating the output.
289:
290: =back
291:
292: All stages are influenced by command-line settings, currently active format,
293: etcetera. See the code for details.
294:
295: =cut
296:
297: sub process_file
298: {
299: my $file = shift (@_);
300: my $saved_umask = umask;
301:
302: print "Processing file $file\n";
303: umask 0077;
304:
305: my ($filename, $filepath, $filesuffix) = fileparse ($file, "\.sgml");
306: my $tmpnam = $filepath . '/' . $filename;
307: $file = $tmpnam . $filesuffix;
308: -f $file || $file =~ /.*.sgml$/ || ($file .= '.sgml');
309: -f $file || ($file = $tmpnam . '.SGML');
310: -f $file || die "Cannot find $file\n";
311: $global->{filename} = $filename;
312: $global->{file} = $file;
313: $global->{filepath} = $filepath;
314:
315: my $tmp = new FileHandle "<$file";
316: my $dtd;
317: while ( <$tmp> )
318: {
319: tr/A-Z/a-z/;
320: # check for [<!doctype ... system] type definition
321: if ( /<!doctype\s*(\w*)\s*system/ )
322: {
323: $dtd = $1;
324: last;
325: }
326: # check for <!doctype ... PUBLIC ... DTD ...
327: if ( /<!doctype\s*\w*\s*public\s*.*\/\/dtd\s*(\w*)/mi )
328: {
329: $dtd = $1;
330: last;
331: }
332: # check for <!doctype ...
333: # PUBLIC ... DTD ...
334: # (multi-line version)
335: if ( /<!doctype\s*(\w*)/ )
336: {
337: $dtd = "precheck";
338: next;
339: }
340: if ( /\s*public\s*.*\/\/dtd\s*(\w*)/ && $dtd eq "precheck" )
341: {
342: $dtd = $1;
343: last;
344: }
345: }
346: $tmp->close;
347: if ( $global->{debug} )
348: {
349: print "DTD: " . $dtd . "\n";
350: }
351: $global->{dtd} = $dtd;
352:
353: # prepare temporary directory
354: my $tmpdir = $ENV{'TMPDIR'} || '/tmp';
355: $tmpdir = $tmpdir . '/' . 'linuxdoc-dir-' . $$;
356: mkdir ($tmpdir, 0700) ||
357: die " - temporary files can not be created, aborted - \n";
358:
359: my $tmpbase = $global->{tmpbase} = $tmpdir . '/sgmltmp.' . $filename;
360: $ENV{"SGML_SEARCH_PATH"} .= ":$filepath";
361:
362: #
363: # Set up the preprocessing command. Conditionals have to be
364: # handled here until they can be moved into the DTD, otherwise
365: # a validating SGML parser will choke on them.
366: #
367: # check if output option for latex is pdf or not
368: if ($global->{format} eq "latex2e")
369: {
370: if ($Formats{$global->{format}}{output} eq "pdf")
371: {
372: $global->{define} .= " pdflatex=yes";
373: }
374: }
375: #
376:
377: local $ENV{PATH} = "$ENV{PATH}:/usr/lib/linuxdoc-tools";
378: my($precmd) = "|sgmlpre output=$global->{format} $global->{define}";
379:
380: #
381: # You can hack $NsgmlsOpts here, etcetera.
382: #
383: $global->{NsgmlsOpts} .= "-D $main::prefix/share/sgml -D $main::DataDir";
384: $global->{NsgmlsOpts} .= "-i$global->{include}" if ($global->{include});
385: $global->{NsgmlsPrePipe} = "NOTHING";
386: if ( defined $Formats{$global->{format}}{preNSGMLS} )
387: {
388: $global->{NsgmlsPrePipe} = &{$Formats{$global->{format}}{preNSGMLS}};
389: }
390:
391: #
392: # Run the prepocessor and nsgmls.
393: #
394: my ($ifile, $writensgmls);
395:
396: if ($global->{NsgmlsPrePipe} eq "NOTHING")
397: {
398: $ifile = new FileHandle $file;
399: }
400: else
401: {
402: $ifile = new FileHandle "$global->{NsgmlsPrePipe}|";
403: }
404:
405: create_temp("$tmpbase.1");
406: $writensgmls = new FileHandle
407: "$precmd|$main::progs->{NSGMLS} $global->{NsgmlsOpts} $ENV{SGMLDECL} >\"$tmpbase.1\"";
408: if ($global->{charset} eq "latin")
409: {
410: while (<$ifile>)
411: {
412: # Outline these commands later on - CdG
413: #change latin1 characters to SGML
414: #by Farzad Farid, adapted by Greg Hankins
415: s/À/\À/g;
416: s/Á/\Á/g;
417: s/Â/\Â/g;
418: s/Ã/\Ã/g;
419: s/Ä/\Ä/g;
420: s/Å/\Å/g;
421: s/Æ/\Æ/g;
422: s/Ç/\Ç/g;
423: s/È/\È/g;
424: s/É/\É/g;
425: s/Ê/\Ê/g;
426: s/Ë/\Ë/g;
427: s/Ì/\Ì/g;
428: s/Í/\Í/g;
429: s/Î/\Î/g;
430: s/Ï/\Ï/g;
431: s/Ñ/\Ñ/g;
432: s/Ò/\Ò/g;
433: s/Ó/\Ó/g;
434: s/Ô/\Ô/g;
435: s/Õ/\Õ/g;
436: s/Ö/\Ö/g;
437: s/Ø/\Ø/g;
438: s/Ù/\Ù/g;
439: s/Ú/\Ú/g;
440: s/Û/\Û/g;
441: s/Ü/\Ü/g;
442: s/Ý/\Ý/g;
443: s/Þ/\Þ/g;
444: s/ß/\ß/g;
445: s/à/\à/g;
446: s/á/\á/g;
447: s/â/\â/g;
448: s/ã/\ã/g;
449: s/ä/\ä/g;
450: s/å/\å/g;
451: s/æ/\æ/g;
452: s/ç/\ç/g;
453: s/è/\è/g;
454: s/é/\é/g;
455: s/ê/\ê/g;
456: s/ë/\ë/g;
457: s/ì/\ì/g;
458: s/í/\í/g;
459: s/î/\î/g;
460: s/ï/\ï/g;
461: s/µ/\μ/g;
462: s/ð/\ð/g;
463: s/ñ/\ñ/g;
464: s/ò/\ò/g;
465: s/ó/\ó/g;
466: s/ô/\ô/g;
467: s/õ/\õ/g;
468: s/ö/\ö/g;
469: s/ø/\ø/g;
470: s/ù/\ù/g;
471: s/ú/\ú/g;
472: s/û/\û/g;
473: s/ü/\ü/g;
474: s/ý/\ý/g;
475: s/þ/\þ/g;
476: s/ÿ/\ÿ/g;
477: print $writensgmls $_;
478: }
479: }
480: else
481: {
482: while (<$ifile>)
483: {
484: print $writensgmls $_;
485: }
486: }
487: $ifile->close;
488: $writensgmls->close;
489:
490: #
491: # Special case: if format is global, we're just checking.
492: #
493: $global->{format} eq "global" && cleanup;
494:
495: #
496: # If the output file is empty, something went wrong.
497: #
498: ! -e "$tmpbase.1" and die "can't create file - exiting";
499: -z "$tmpbase.1" and die "SGML parsing error - exiting";
500: if ( $global->{debug} )
501: {
502: print "Nsgmls stage finished.\n";
503: }
504:
505: #
506: # If a preASP stage is defined, let the format handle it.
507: #
508: # preASP ($inhandle, $outhandle);
509: #
510: my $inpreasp = new FileHandle "<$tmpbase.1";
511: my $outpreasp = new FileHandle "$tmpbase.2",O_WRONLY|O_CREAT|O_EXCL,0600;
512: if (defined $Formats{$global->{format}}{preASP})
513: {
514: &{$Formats{$global->{format}}{preASP}}($inpreasp, $outpreasp) == 0 or
515: die "error pre-processing $global->{format}.\n";
516: }
517: else
518: {
519: copy ($inpreasp, $outpreasp);
520: }
521: $inpreasp->close;
522: $outpreasp->close;
523: ! -e "$tmpbase.2" and die "can't create file - exiting";
524:
525: if ( $global->{debug} )
526: {
527: print "PreASP stage finished.\n";
528: }
529:
530: #
531: # Run sgmlsasp, with an optional style if specified.
532: #
533: # Search order:
534: # - datadir/site/<dtd>/<format>
535: # - datadir/dist/<dtd>/<format>
536: # So we need to fetch the doctype from the intermediate.
537: #
538: # Note: this is a very simplistic check - but as far as I know,
539: # it is correct. Am I right?
540: #
541: my $tmp = new FileHandle "<$tmpbase.2";
542: my $dtd;
543: while ( ($dtd = <$tmp>) && ! ( $dtd =~ /^\(/) ) { };
544: $tmp->close;
545: $dtd =~ s/^\(//;
546: $dtd =~ tr/A-Z/a-z/;
547: chop $dtd;
548: $global->{dtd} = $dtd;
549:
550: my $style = "";
551: if ($global->{style})
552: {
553: $style = "$main::DataDir/site/$dtd/$global->{format}/$global->{style}mapping";
554: -r $style or
555: $style = "$main::DataDir/dist/$dtd/$global->{format}/$global->{style}mapping";
556: }
557: my $mapping = "$main::DataDir/site/$dtd/$global->{format}/mapping";
558: -r $mapping or $mapping = "$main::DataDir/dist/$dtd/$global->{format}/mapping";
559:
560: $global->{charset} = "nippon" if ($global->{language} eq "ja");
561: #
562: # we don't have Korean groff so charset should be latin1.
563: #
564: if ($global->{language} eq "ko")
565: {
566: if ($global->{format} eq "groff")
567: {
568: $global->{charset} = "latin1";
569: }
570: else
571: {
572: $global->{charset} = "euc-kr";
573: }
574: }
575:
576: if ($global->{format} eq "groff" or $global->{format} eq "latex2e")
577: {
578: if ($dtd eq "linuxdoctr")
579: {
580: $mapping = "$main::DataDir/dist/$dtd/$global->{format}/tr-mapping";
581: }
582: }
583:
584: create_temp("$tmpbase.3");
585: system ("$main::progs->{SGMLSASP} $style $mapping <\"$tmpbase.2\" |
586: expand -$global->{tabsize} >\"$tmpbase.3\"");
587: ! -e "$tmpbase.3" and die "can't create file - exiting";
588:
589:
590: if ( $global->{debug} )
591: {
592: print "ASP stage finished.\n";
593: }
594:
595: #
596: # If a postASP stage is defined, let the format handle it.
597: # It should leave whatever it thinks is right based on $file.
598: #
599: # postASP ($inhandle)
600: #
601: umask $saved_umask;
602: my $inpostasp = new FileHandle "<$tmpbase.3";
603: if (defined $Formats{$global->{format}}{postASP})
604: {
605: &{$Formats{$global->{format}}{postASP}}($inpostasp) == 0 or
606: die "error post-processing $global->{format}.\n";
607: }
608: $inpostasp->close;
609:
610: if ( $global->{debug} )
611: {
612: print "postASP stage finished.\n";
613: }
614:
615: #
616: # All done, remove the temporaries.
617: #
618: if( !$global->{debug} ) {
619: remove_tmpfiles($tmpbase);
620: }
621: }
622:
623: =pod
624:
625: =back
626:
627: =head1 SEE ALSO
628:
629: Documentation for various sub-packages of LinuxDocTools.
630:
631: =head1 AUTHOR
632: SGMLTools are written by Cees de Groot, C<E<lt>cg@cdegroot.comE<gt>>,
633: and various SGML-Tools contributors as listed in C<CONTRIBUTORS>.
634: Taketoshi Sano C<E<lt>sano@debian.org<gt>> rename to LinuxDocTools.
635:
636: =cut
637: 1;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>