Annotation of embedaddon/bird/doc/LinuxDocTools.pm, revision 1.1
1.1 ! misho 1: #! /usr/bin/perl
! 2: #
! 3: # LinuxDocTools.pm
! 4: #
! 5: # $Id$
! 6: #
! 7: # LinuxDoc-Tools driver core. This contains all the basic functionality
! 8: # we need to control all other components.
! 9: #
! 10: # © Copyright 1996, Cees de Groot.
! 11: # © Copyright 2000, Taketoshi Sano
! 12: #
! 13: # THIS VERSION HAS BEEN HACKED FOR BIRD BY MARTIN MARES
! 14: #
! 15: package LinuxDocTools;
! 16:
! 17: require 5.004;
! 18: use strict;
! 19:
! 20: =head1 NAME
! 21:
! 22: LinuxDocTools - SGML conversion utilities for LinuxDoc DTD.
! 23:
! 24: =head1 SYNOPSIS
! 25:
! 26: use LinuxDocTools;
! 27: LinuxDocTools::init;
! 28: @files = LinuxDocTools::process_options ($0, @ARGV);
! 29: for $curfile (@files) {
! 30: LinuxDocTools::process_file ($curfile);
! 31: }
! 32:
! 33: =head1 DESCRIPTION
! 34:
! 35: The LinuxDocTools package encapsulates all the functionality offered by
! 36: LinuxDoc-Tools. It is used, of course, by LinuxDoc-Tools;
! 37: but the encapsulation should provide for a simple interface for other users as well.
! 38:
! 39: =head1 FUNCTIONS
! 40:
! 41: =over 4
! 42:
! 43: =cut
! 44:
! 45: use DirHandle;
! 46: use File::Basename;
! 47: use File::Find;
! 48: use File::Copy;
! 49: use FileHandle;
! 50: use IPC::Open2;
! 51: use Cwd;
! 52: use LinuxDocTools::Lang;
! 53: use LinuxDocTools::Utils qw(process_options usage cleanup trap_signals remove_tmpfiles create_temp);
! 54: use LinuxDocTools::Vars;
! 55:
! 56: sub BEGIN
! 57: {
! 58: #
! 59: # Make sure we're always looking here. Note that "use lib" adds
! 60: # on the front of the search path, so we first push dist, then
! 61: # site, so that site is searched first.
! 62: #
! 63: use lib "$main::DataDir/dist";
! 64: use lib "$main::DataDir/site";
! 65: }
! 66:
! 67: =item LinuxDocTools::init
! 68:
! 69: Takes care of initialization of package-global variables (which are actually
! 70: defined in L<LinuxDocTools::Vars>). The package-global variables are I<$global>,
! 71: a reference to a hash containing numerous settings, I<%Formats>, a hash
! 72: containing all the formats, and I<%FmtList>, a hash containing the currently
! 73: active formats for help texts.
! 74:
! 75: Apart from this, C<LinuxDocTools::init> also finds all distributed and site-local
! 76: formatting backends and C<require>s them.
! 77:
! 78: =cut
! 79:
! 80: sub init
! 81: {
! 82: trap_signals;
! 83:
! 84: #
! 85: # Register the ``global'' pseudoformat. Apart from the global settings,
! 86: # we also use $global to keep the global variable name space clean;
! 87: # everything that we need to provide to other modules is stuffed
! 88: # into $global.
! 89: #
! 90: $global = {};
! 91: $global->{NAME} = "global";
! 92: $global->{HELP} = "";
! 93: $global->{OPTIONS} = [
! 94: { option => "backend", type => "l",
! 95: 'values' => [ "html", "info", "latex",
! 96: "lyx", "rtf", "txt", "check" ],
! 97: short => "B" },
! 98: { option => "papersize", type => "l",
! 99: 'values' => [ "a4", "letter" ], short => "p" },
! 100: { option => "language", type => "l",
! 101: 'values' => [ @LinuxDocTools::Lang::Languages ], short => "l" },
! 102: { option => "charset", type => "l",
! 103: 'values' => [ "latin", "ascii", "nippon", "euc-kr" ], short => "c" },
! 104: { option => "style", type => "s", short => "S" },
! 105: { option => "tabsize", type => "i", short => "t" },
! 106: # { option => "verbose", type => "f", short => "v" },
! 107: { option => "debug", type => "f", short => "d" },
! 108: { option => "define", type => "s", short => "D" },
! 109: { option => "include", type => "s", short => "i" },
! 110: { option => "pass", type => "s", short => "P" }
! 111: ];
! 112: $global->{backend} = "linuxdoc";
! 113: $global->{papersize} = "a4";
! 114: $global->{language} = "en";
! 115: $global->{charset} = "ascii";
! 116: $global->{style} = "";
! 117: $global->{tabsize} = 8;
! 118: $global->{verbose} = 0;
! 119: $global->{define} = "";
! 120: $global->{debug} = 0;
! 121: $global->{include} = "";
! 122: $global->{pass} = "";
! 123: $global->{InFiles} = [];
! 124: $Formats{$global->{NAME}} = $global; # All formats we know.
! 125: $FmtList{$global->{NAME}} = $global; # List of formats for help msgs.
! 126:
! 127: # automatic language detection: disabled by default
! 128: # {
! 129: # my $lang;
! 130: # foreach $lang (@LinuxDocTools::Lang::Languages)
! 131: # {
! 132: # if (($ENV{"LC_ALL"} =~ /^$lang/i) ||
! 133: # ($ENV{"LC_CTYPE"} =~ /^$lang/i) ||
! 134: # ($ENV{"LANG"} =~ /^$lang/i)) {
! 135: # $global->{language} = Any2ISO($lang);
! 136: # }
! 137: # }
! 138: # }
! 139:
! 140: #
! 141: # Used when the format is "global" (from sgmlcheck).
! 142: #
! 143: $global->{preNSGMLS} = sub {
! 144: $global->{NsgmlsOpts} .= " -s ";
! 145: $global->{NsgmlsPrePipe} = "cat $global->{file}";
! 146: };
! 147:
! 148: #
! 149: # Build up the list of formatters.
! 150: #
! 151: my $savdir = cwd;
! 152: my %Locs;
! 153: chdir "$main::DataDir/dist";
! 154: my $dir = new DirHandle(".");
! 155: die "Unable to read directory $main::DataDir/dist: $!" unless defined($dir);
! 156: foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read()))
! 157: {
! 158: $Locs{$fmt} = "dist";
! 159: }
! 160: $dir->close();
! 161: chdir "$main::DataDir/site";
! 162: $dir = new DirHandle(".");
! 163: die "Unable to read directory $main::DataDir/site: $!" unless defined($dir);
! 164: foreach my $fmt (grep(/^fmt_.*\.pl$/, $dir->read()))
! 165: {
! 166: $Locs{$fmt} = "site";
! 167: }
! 168: $dir->close();
! 169: foreach my $fmt (keys %Locs)
! 170: {
! 171: require $fmt;
! 172: }
! 173: chdir $savdir;
! 174: }
! 175:
! 176: =item LinuxDocTools::process_options ($0, @ARGV)
! 177:
! 178: This function contains all initialization that is bound to the current
! 179: invocation of LinuxDocTools. It looks in C<$0> to deduce the backend that
! 180: should be used (ld2txt activates the I<txt> backend) and parses the
! 181: options array. It returns an array of filenames it encountered during
! 182: option processing.
! 183:
! 184: As a side effect, the environment variables I<SGMLDECL> and
! 185: I<SGML_CATALOG_FILES> are modified.
! 186:
! 187: =cut
! 188:
! 189: sub process_options
! 190: {
! 191: my $progname = shift;
! 192: my @args = @_;
! 193:
! 194: #
! 195: # Deduce the format from the caller's file name
! 196: #
! 197: my ($format, $dummy1, $dummy2) = fileparse ($progname, "");
! 198: $global->{myname} = $format;
! 199: $format =~ s/sgml2*(.*)/$1/;
! 200:
! 201: #
! 202: # check the option "--backend / -B"
! 203: #
! 204: if ($format eq "linuxdoc") {
! 205: my @backends = @args;
! 206: my $arg;
! 207: while (@backends) {
! 208: $arg = shift @backends;
! 209: if ($arg eq "-B") {
! 210: $arg = shift @backends;
! 211: $format = $arg;
! 212: last;
! 213: }
! 214: if ( $arg =~ s/--backend=(.*)/$1/ ) {
! 215: $format = $arg;
! 216: last;
! 217: }
! 218: }
! 219: }
! 220:
! 221: $format = "global" if $format eq "check";
! 222: usage ("") if $format eq "linuxdoc";
! 223: $format = "latex2e" if $format eq "latex";
! 224: $FmtList{$format} = $Formats{$format} or
! 225: usage ("$global->{myname}: unknown format");
! 226: $global->{format} = $format;
! 227:
! 228: #
! 229: # Parse all the options.
! 230: #
! 231: my @files = LinuxDocTools::Utils::process_options (@args);
! 232: $global->{language} = Any2ISO ($global->{language});
! 233: #
! 234: # check the number of given files
! 235: $#files > -1 || usage ("no filenames given");
! 236:
! 237: #
! 238: # Setup the SGML environment.
! 239: # (Note that Debian package rewrite path to catalog of
! 240: # iso-entities using debian/rules so that it can use
! 241: # entities from sgml-data pacakge. debian/rules also
! 242: # removes iso-entites sub directory after doing make install.)
! 243: #
! 244: $ENV{SGML_CATALOG_FILES} .= (defined $ENV{SGML_CATALOG_FILES} ? ":" : "") .
! 245: "$main::prefix/share/sgml/entities/sgml-iso-entities-8879.1986/catalog";
! 246: $ENV{SGML_CATALOG_FILES} .= ":$main::DataDir/linuxdoc-tools.catalog";
! 247: $ENV{SGML_CATALOG_FILES} .= ":$main::/etc/sgml.catalog";
! 248: if (-f "$main::DataDir/dtd/$format.dcl")
! 249: {
! 250: $ENV{SGMLDECL} = "$main::DataDir/dtd/$format.dcl";
! 251: }
! 252: elsif (-f "$main::DataDir/dtd/$global->{style}.dcl")
! 253: {
! 254: $ENV{SGMLDECL} = "$main::DataDir/dtd/$global->{style}.dcl";
! 255: }
! 256: elsif (-f "$main::DataDir/dtd/sgml.dcl")
! 257: {
! 258: $ENV{SGMLDECL} = "$main::DataDir/dtd/sgml.dcl";
! 259: }
! 260:
! 261: #
! 262: # OK. Give the list of files we distilled from the options
! 263: # back to the caller.
! 264: #
! 265: return @files;
! 266: }
! 267:
! 268: =item LinuxDocTools::process_file
! 269:
! 270: With all the configuration done, this routine will take a single filename
! 271: and convert it to the currently active backend format. The conversion is
! 272: done in a number of steps in tight interaction with the currently active
! 273: backend (see also L<LinuxDocTools::BackEnd>):
! 274:
! 275: =over
! 276:
! 277: =item 1. Backend: set NSGMLS options and optionally create a pre-NSGMLS pipe.
! 278:
! 279: =item 2. Here: Run the preprocessor to handle conditionals.
! 280:
! 281: =item 3. Here: Run NSGMLS.
! 282:
! 283: =item 4. Backend: run pre-ASP conversion.
! 284:
! 285: =item 5. Here: Run SGMLSASP.
! 286:
! 287: =item 6. Backend: run post-ASP conversion, generating the output.
! 288:
! 289: =back
! 290:
! 291: All stages are influenced by command-line settings, currently active format,
! 292: etcetera. See the code for details.
! 293:
! 294: =cut
! 295:
! 296: sub process_file
! 297: {
! 298: my $file = shift (@_);
! 299: my $saved_umask = umask;
! 300:
! 301: print "Processing file $file\n";
! 302: umask 0077;
! 303:
! 304: my ($filename, $filepath, $filesuffix) = fileparse ($file, "\.sgml");
! 305: my $tmpnam = $filepath . '/' . $filename;
! 306: $file = $tmpnam . $filesuffix;
! 307: -f $file || $file =~ /.*.sgml$/ || ($file .= '.sgml');
! 308: -f $file || ($file = $tmpnam . '.SGML');
! 309: -f $file || die "Cannot find $file\n";
! 310: $global->{filename} = $filename;
! 311: $global->{file} = $file;
! 312: $global->{filepath} = $filepath;
! 313:
! 314: my $tmp = new FileHandle "<$file";
! 315: my $dtd;
! 316: while ( <$tmp> )
! 317: {
! 318: tr/A-Z/a-z/;
! 319: # check for [<!doctype ... system] type definition
! 320: if ( /<!doctype\s*(\w*)\s*system/ )
! 321: {
! 322: $dtd = $1;
! 323: last;
! 324: }
! 325: # check for <!doctype ... PUBLIC ... DTD ...
! 326: if ( /<!doctype\s*\w*\s*public\s*.*\/\/dtd\s*(\w*)/mi )
! 327: {
! 328: $dtd = $1;
! 329: last;
! 330: }
! 331: # check for <!doctype ...
! 332: # PUBLIC ... DTD ...
! 333: # (multi-line version)
! 334: if ( /<!doctype\s*(\w*)/ )
! 335: {
! 336: $dtd = "precheck";
! 337: next;
! 338: }
! 339: if ( /\s*public\s*.*\/\/dtd\s*(\w*)/ && $dtd eq "precheck" )
! 340: {
! 341: $dtd = $1;
! 342: last;
! 343: }
! 344: }
! 345: $tmp->close;
! 346: if ( $global->{debug} )
! 347: {
! 348: print "DTD: " . $dtd . "\n";
! 349: }
! 350: $global->{dtd} = $dtd;
! 351:
! 352: # prepare temporary directory
! 353: my $tmpdir = $ENV{'TMPDIR'} || '/tmp';
! 354: $tmpdir = $tmpdir . '/' . 'linuxdoc-dir-' . $$;
! 355: mkdir ($tmpdir, 0700) ||
! 356: die " - temporary files can not be created, aborted - \n";
! 357:
! 358: my $tmpbase = $global->{tmpbase} = $tmpdir . '/sgmltmp.' . $filename;
! 359: $ENV{"SGML_SEARCH_PATH"} .= ":$filepath";
! 360:
! 361: #
! 362: # Set up the preprocessing command. Conditionals have to be
! 363: # handled here until they can be moved into the DTD, otherwise
! 364: # a validating SGML parser will choke on them.
! 365: #
! 366: # check if output option for latex is pdf or not
! 367: if ($global->{format} eq "latex2e")
! 368: {
! 369: if ($Formats{$global->{format}}{output} eq "pdf")
! 370: {
! 371: $global->{define} .= " pdflatex=yes";
! 372: }
! 373: }
! 374: #
! 375: my($precmd) = "|sgmlpre output=$global->{format} $global->{define}";
! 376:
! 377: #
! 378: # You can hack $NsgmlsOpts here, etcetera.
! 379: #
! 380: $global->{NsgmlsOpts} .= "-D $main::prefix/share/sgml -D $main::DataDir";
! 381: $global->{NsgmlsOpts} .= "-i$global->{include}" if ($global->{include});
! 382: $global->{NsgmlsPrePipe} = "NOTHING";
! 383: if ( defined $Formats{$global->{format}}{preNSGMLS} )
! 384: {
! 385: $global->{NsgmlsPrePipe} = &{$Formats{$global->{format}}{preNSGMLS}};
! 386: }
! 387:
! 388: #
! 389: # Run the prepocessor and nsgmls.
! 390: #
! 391: my ($ifile, $writensgmls);
! 392:
! 393: if ($global->{NsgmlsPrePipe} eq "NOTHING")
! 394: {
! 395: $ifile = new FileHandle $file;
! 396: }
! 397: else
! 398: {
! 399: $ifile = new FileHandle "$global->{NsgmlsPrePipe}|";
! 400: }
! 401:
! 402: create_temp("$tmpbase.1");
! 403: $writensgmls = new FileHandle
! 404: "$precmd|$main::progs->{NSGMLS} $global->{NsgmlsOpts} $ENV{SGMLDECL} >\"$tmpbase.1\"";
! 405: if ($global->{charset} eq "latin")
! 406: {
! 407: while (<$ifile>)
! 408: {
! 409: # Outline these commands later on - CdG
! 410: #change latin1 characters to SGML
! 411: #by Farzad Farid, adapted by Greg Hankins
! 412: s/À/\À/g;
! 413: s/Á/\Á/g;
! 414: s/Â/\Â/g;
! 415: s/Ã/\Ã/g;
! 416: s/Ä/\Ä/g;
! 417: s/Å/\Å/g;
! 418: s/Æ/\Æ/g;
! 419: s/Ç/\Ç/g;
! 420: s/È/\È/g;
! 421: s/É/\É/g;
! 422: s/Ê/\Ê/g;
! 423: s/Ë/\Ë/g;
! 424: s/Ì/\Ì/g;
! 425: s/Í/\Í/g;
! 426: s/Î/\Î/g;
! 427: s/Ï/\Ï/g;
! 428: s/Ñ/\Ñ/g;
! 429: s/Ò/\Ò/g;
! 430: s/Ó/\Ó/g;
! 431: s/Ô/\Ô/g;
! 432: s/Õ/\Õ/g;
! 433: s/Ö/\Ö/g;
! 434: s/Ø/\Ø/g;
! 435: s/Ù/\Ù/g;
! 436: s/Ú/\Ú/g;
! 437: s/Û/\Û/g;
! 438: s/Ü/\Ü/g;
! 439: s/Ý/\Ý/g;
! 440: s/Þ/\Þ/g;
! 441: s/ß/\ß/g;
! 442: s/à/\à/g;
! 443: s/á/\á/g;
! 444: s/â/\â/g;
! 445: s/ã/\ã/g;
! 446: s/ä/\ä/g;
! 447: s/å/\å/g;
! 448: s/æ/\æ/g;
! 449: s/ç/\ç/g;
! 450: s/è/\è/g;
! 451: s/é/\é/g;
! 452: s/ê/\ê/g;
! 453: s/ë/\ë/g;
! 454: s/ì/\ì/g;
! 455: s/í/\í/g;
! 456: s/î/\î/g;
! 457: s/ï/\ï/g;
! 458: s/µ/\μ/g;
! 459: s/ð/\ð/g;
! 460: s/ñ/\ñ/g;
! 461: s/ò/\ò/g;
! 462: s/ó/\ó/g;
! 463: s/ô/\ô/g;
! 464: s/õ/\õ/g;
! 465: s/ö/\ö/g;
! 466: s/ø/\ø/g;
! 467: s/ù/\ù/g;
! 468: s/ú/\ú/g;
! 469: s/û/\û/g;
! 470: s/ü/\ü/g;
! 471: s/ý/\ý/g;
! 472: s/þ/\þ/g;
! 473: s/ÿ/\ÿ/g;
! 474: print $writensgmls $_;
! 475: }
! 476: }
! 477: else
! 478: {
! 479: while (<$ifile>)
! 480: {
! 481: print $writensgmls $_;
! 482: }
! 483: }
! 484: $ifile->close;
! 485: $writensgmls->close;
! 486:
! 487: #
! 488: # Special case: if format is global, we're just checking.
! 489: #
! 490: $global->{format} eq "global" && cleanup;
! 491:
! 492: #
! 493: # If the output file is empty, something went wrong.
! 494: #
! 495: ! -e "$tmpbase.1" and die "can't create file - exiting";
! 496: -z "$tmpbase.1" and die "SGML parsing error - exiting";
! 497: if ( $global->{debug} )
! 498: {
! 499: print "Nsgmls stage finished.\n";
! 500: }
! 501:
! 502: #
! 503: # If a preASP stage is defined, let the format handle it.
! 504: #
! 505: # preASP ($inhandle, $outhandle);
! 506: #
! 507: my $inpreasp = new FileHandle "<$tmpbase.1";
! 508: my $outpreasp = new FileHandle "$tmpbase.2",O_WRONLY|O_CREAT|O_EXCL,0600;
! 509: if (defined $Formats{$global->{format}}{preASP})
! 510: {
! 511: &{$Formats{$global->{format}}{preASP}}($inpreasp, $outpreasp) == 0 or
! 512: die "error pre-processing $global->{format}.\n";
! 513: }
! 514: else
! 515: {
! 516: copy ($inpreasp, $outpreasp);
! 517: }
! 518: $inpreasp->close;
! 519: $outpreasp->close;
! 520: ! -e "$tmpbase.2" and die "can't create file - exiting";
! 521:
! 522: if ( $global->{debug} )
! 523: {
! 524: print "PreASP stage finished.\n";
! 525: }
! 526:
! 527: #
! 528: # Run sgmlsasp, with an optional style if specified.
! 529: #
! 530: # Search order:
! 531: # - datadir/site/<dtd>/<format>
! 532: # - datadir/dist/<dtd>/<format>
! 533: # So we need to fetch the doctype from the intermediate.
! 534: #
! 535: # Note: this is a very simplistic check - but as far as I know,
! 536: # it is correct. Am I right?
! 537: #
! 538: my $tmp = new FileHandle "<$tmpbase.2";
! 539: my $dtd;
! 540: while ( ($dtd = <$tmp>) && ! ( $dtd =~ /^\(/) ) { };
! 541: $tmp->close;
! 542: $dtd =~ s/^\(//;
! 543: $dtd =~ tr/A-Z/a-z/;
! 544: chop $dtd;
! 545: $global->{dtd} = $dtd;
! 546:
! 547: my $style = "";
! 548: if ($global->{style})
! 549: {
! 550: $style = "$main::DataDir/site/$dtd/$global->{format}/$global->{style}mapping";
! 551: -r $style or
! 552: $style = "$main::DataDir/dist/$dtd/$global->{format}/$global->{style}mapping";
! 553: }
! 554: my $mapping = "$main::DataDir/site/$dtd/$global->{format}/mapping";
! 555: -r $mapping or $mapping = "$main::DataDir/dist/$dtd/$global->{format}/mapping";
! 556:
! 557: $global->{charset} = "nippon" if ($global->{language} eq "ja");
! 558: #
! 559: # we don't have Korean groff so charset should be latin1.
! 560: #
! 561: if ($global->{language} eq "ko")
! 562: {
! 563: if ($global->{format} eq "groff")
! 564: {
! 565: $global->{charset} = "latin1";
! 566: }
! 567: else
! 568: {
! 569: $global->{charset} = "euc-kr";
! 570: }
! 571: }
! 572:
! 573: if ($global->{format} eq "groff" or $global->{format} eq "latex2e")
! 574: {
! 575: if ($dtd eq "linuxdoctr")
! 576: {
! 577: $mapping = "$main::DataDir/dist/$dtd/$global->{format}/tr-mapping";
! 578: }
! 579: }
! 580:
! 581: create_temp("$tmpbase.3");
! 582: system ("$main::progs->{SGMLSASP} $style $mapping <\"$tmpbase.2\" |
! 583: expand -$global->{tabsize} >\"$tmpbase.3\"");
! 584: ! -e "$tmpbase.3" and die "can't create file - exiting";
! 585:
! 586:
! 587: if ( $global->{debug} )
! 588: {
! 589: print "ASP stage finished.\n";
! 590: }
! 591:
! 592: #
! 593: # If a postASP stage is defined, let the format handle it.
! 594: # It should leave whatever it thinks is right based on $file.
! 595: #
! 596: # postASP ($inhandle)
! 597: #
! 598: umask $saved_umask;
! 599: my $inpostasp = new FileHandle "<$tmpbase.3";
! 600: if (defined $Formats{$global->{format}}{postASP})
! 601: {
! 602: &{$Formats{$global->{format}}{postASP}}($inpostasp) == 0 or
! 603: die "error post-processing $global->{format}.\n";
! 604: }
! 605: $inpostasp->close;
! 606:
! 607: if ( $global->{debug} )
! 608: {
! 609: print "postASP stage finished.\n";
! 610: }
! 611:
! 612: #
! 613: # All done, remove the temporaries.
! 614: #
! 615: if( !$global->{debug} ) {
! 616: remove_tmpfiles($tmpbase);
! 617: }
! 618: }
! 619:
! 620: =pod
! 621:
! 622: =back
! 623:
! 624: =head1 SEE ALSO
! 625:
! 626: Documentation for various sub-packages of LinuxDocTools.
! 627:
! 628: =head1 AUTHOR
! 629: SGMLTools are written by Cees de Groot, C<E<lt>cg@cdegroot.comE<gt>>,
! 630: and various SGML-Tools contributors as listed in C<CONTRIBUTORS>.
! 631: Taketoshi Sano C<E<lt>sano@debian.org<gt>> rename to LinuxDocTools.
! 632:
! 633: =cut
! 634: 1;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>