1.1     ! misho       1: <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
        !             2: 
        !             3: <!-- Process this file with docbook-to-man to generate an nroff manual
        !             4:      page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
        !             5:      the manual page with: `docbook-to-man manpage.sgml | nroff -man |
        !             6:      less'.  A typical entry in a Makefile or is:
        !             7: 
        !             8: manpage.1: manpage.sgml
        !             9:        docbook-to-man $< > $@
        !            10:   -->
        !            11: 
        !            12:   <!-- Fill in your name for FIRSTNAME and SURNAME. -->
        !            13:   <!ENTITY dhfirstname "<firstname>Scott</firstname>">
        !            14:   <!ENTITY dhsurname   "<surname>Bronson</surname>">
        !            15:   <!-- Please adjust the date whenever revising the manpage. -->
        !            16:   <!ENTITY dhdate      "<date>December  5, 2001</date>">
        !            17:   <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
        !            18:        allowed: see man(7), man(1). -->
        !            19:   <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
        !            20:   <!ENTITY dhemail     "<email></email>">
        !            21:   <!ENTITY dhusername  "Scott Bronson">
        !            22:   <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
        !            23:   <!ENTITY dhpackage   "xmlwf">
        !            24: 
        !            25:   <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
        !            26:   <!ENTITY gnu         "<acronym>GNU</acronym>">
        !            27: ]>
        !            28: 
        !            29: <refentry>
        !            30:   <refentryinfo>
        !            31:     <address>
        !            32:       &dhemail;
        !            33:     </address>
        !            34:     <author>
        !            35:       &dhfirstname;
        !            36:       &dhsurname;
        !            37:     </author>
        !            38:     <copyright>
        !            39:       <year>2001</year>
        !            40:       <holder>&dhusername;</holder>
        !            41:     </copyright>
        !            42:     &dhdate;
        !            43:   </refentryinfo>
        !            44:   <refmeta>
        !            45:     &dhucpackage;
        !            46: 
        !            47:     &dhsection;
        !            48:   </refmeta>
        !            49:   <refnamediv>
        !            50:     <refname>&dhpackage;</refname>
        !            51: 
        !            52:     <refpurpose>Determines if an XML document is well-formed</refpurpose>
        !            53:   </refnamediv>
        !            54:   <refsynopsisdiv>
        !            55:     <cmdsynopsis>
        !            56:       <command>&dhpackage;</command>
        !            57:          <arg><option>-s</option></arg>
        !            58:          <arg><option>-n</option></arg>
        !            59:          <arg><option>-p</option></arg>
        !            60:          <arg><option>-x</option></arg>
        !            61: 
        !            62:          <arg><option>-e <replaceable>encoding</replaceable></option></arg>
        !            63:          <arg><option>-w</option></arg>
        !            64: 
        !            65:          <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
        !            66:          <arg><option>-c</option></arg>
        !            67:          <arg><option>-m</option></arg>
        !            68: 
        !            69:          <arg><option>-r</option></arg>
        !            70:          <arg><option>-t</option></arg>
        !            71: 
        !            72:          <arg><option>-v</option></arg>
        !            73: 
        !            74:          <arg>file ...</arg>
        !            75:     </cmdsynopsis>
        !            76:   </refsynopsisdiv>
        !            77:  
        !            78:   <refsect1>
        !            79:     <title>DESCRIPTION</title>
        !            80: 
        !            81:     <para>
        !            82:        <command>&dhpackage;</command> uses the Expat library to
        !            83:        determine if an XML document is well-formed.  It is
        !            84:        non-validating.
        !            85:        </para>
        !            86: 
        !            87:        <para>
        !            88:        If you do not specify any files on the command-line, and you
        !            89:        have a recent version of <command>&dhpackage;</command>, the
        !            90:        input file will be read from standard input.
        !            91:        </para>
        !            92: 
        !            93:   </refsect1>
        !            94: 
        !            95:   <refsect1>
        !            96:     <title>WELL-FORMED DOCUMENTS</title>
        !            97: 
        !            98:        <para>
        !            99:          A well-formed document must adhere to the
        !           100:          following rules:
        !           101:        </para>
        !           102: 
        !           103:        <itemizedlist>
        !           104:       <listitem><para>
        !           105:            The file begins with an XML declaration.  For instance,
        !           106:                <literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
        !           107:                <emphasis>NOTE:</emphasis>
        !           108:                <command>&dhpackage;</command> does not currently
        !           109:                check for a valid XML declaration.
        !           110:       </para></listitem>
        !           111:       <listitem><para>
        !           112:                Every start tag is either empty (&lt;tag/&gt;)
        !           113:                or has a corresponding end tag.
        !           114:       </para></listitem>
        !           115:       <listitem><para>
        !           116:            There is exactly one root element.  This element must contain
        !           117:                all other elements in the document.  Only comments, white
        !           118:                space, and processing instructions may come after the close
        !           119:                of the root element.
        !           120:       </para></listitem>
        !           121:       <listitem><para>
        !           122:                All elements nest properly.
        !           123:       </para></listitem>
        !           124:       <listitem><para>
        !           125:                All attribute values are enclosed in quotes (either single
        !           126:                or double).
        !           127:       </para></listitem>
        !           128:     </itemizedlist>
        !           129: 
        !           130:        <para>
        !           131:          If the document has a DTD, and it strictly complies with that
        !           132:          DTD, then the document is also considered <emphasis>valid</emphasis>.
        !           133:          <command>&dhpackage;</command> is a non-validating parser --
        !           134:          it does not check the DTD.  However, it does support
        !           135:          external entities (see the <option>-x</option> option).
        !           136:        </para>
        !           137:   </refsect1>
        !           138: 
        !           139:   <refsect1>
        !           140:     <title>OPTIONS</title>
        !           141: 
        !           142: <para>
        !           143: When an option includes an argument, you may specify the argument either
        !           144: separately ("<option>-d</option> output") or concatenated with the
        !           145: option ("<option>-d</option>output").  <command>&dhpackage;</command>
        !           146: supports both.
        !           147: </para>
        !           148: 
        !           149:     <variablelist>
        !           150: 
        !           151:       <varlistentry>
        !           152:         <term><option>-c</option></term>
        !           153:         <listitem>
        !           154:                <para>
        !           155:   If the input file is well-formed and <command>&dhpackage;</command>
        !           156:   doesn't encounter any errors, the input file is simply copied to
        !           157:   the output directory unchanged.
        !           158:   This implies no namespaces (turns off <option>-n</option>) and
        !           159:   requires <option>-d</option> to specify an output file.
        !           160:                </para>
        !           161:         </listitem>
        !           162:       </varlistentry>
        !           163: 
        !           164:       <varlistentry>
        !           165:         <term><option>-d output-dir</option></term>
        !           166:         <listitem>
        !           167:                <para>
        !           168:   Specifies a directory to contain transformed
        !           169:   representations of the input files.
        !           170:   By default, <option>-d</option> outputs a canonical representation
        !           171:   (described below).
        !           172:   You can select different output formats using <option>-c</option>
        !           173:   and <option>-m</option>.
        !           174:          </para>
        !           175:          <para>
        !           176:   The output filenames will
        !           177:   be exactly the same as the input filenames or "STDIN" if the input is
        !           178:   coming from standard input.  Therefore, you must be careful that the
        !           179:   output file does not go into the same directory as the input
        !           180:   file.  Otherwise, <command>&dhpackage;</command> will delete the
        !           181:   input file before it generates the output file (just like running
        !           182:   <literal>cat &lt; file &gt; file</literal> in most shells).
        !           183:          </para>
        !           184:          <para> 
        !           185:   Two structurally equivalent XML documents have a byte-for-byte
        !           186:   identical canonical XML representation.
        !           187:   Note that ignorable white space is considered significant and
        !           188:   is treated equivalently to data.
        !           189:   More on canonical XML can be found at
        !           190: .
        !           191:          </para>
        !           192:         </listitem>
        !           193:       </varlistentry>
        !           194: 
        !           195:       <varlistentry>
        !           196:         <term><option>-e encoding</option></term>
        !           197:         <listitem>
        !           198:                <para>
        !           199:    Specifies the character encoding for the document, overriding
        !           200:    any document encoding declaration.  <command>&dhpackage;</command>
        !           201:    supports four built-in encodings:
        !           202:        <literal>US-ASCII</literal>,
        !           203:        <literal>UTF-8</literal>,
        !           204:        <literal>UTF-16</literal>, and
        !           205:        <literal>ISO-8859-1</literal>.
        !           206:    Also see the <option>-w</option> option.
        !           207:           </para>
        !           208:         </listitem>
        !           209:       </varlistentry>
        !           210: 
        !           211:       <varlistentry>
        !           212:         <term><option>-m</option></term>
        !           213:         <listitem>
        !           214:                <para>
        !           215:   Outputs some strange sort of XML file that completely
        !           216:   describes the the input file, including character postitions.
        !           217:   Requires <option>-d</option> to specify an output file.
        !           218:           </para>
        !           219:         </listitem>
        !           220:       </varlistentry>
        !           221: 
        !           222:       <varlistentry>
        !           223:         <term><option>-n</option></term>
        !           224:         <listitem>
        !           225:                <para>
        !           226:   Turns on namespace processing.  (describe namespaces)
        !           227:   <option>-c</option> disables namespaces.
        !           228:           </para>
        !           229:         </listitem>
        !           230:       </varlistentry>
        !           231: 
        !           232:       <varlistentry>
        !           233:         <term><option>-p</option></term>
        !           234:         <listitem>
        !           235:                <para>
        !           236:     Tells xmlwf to process external DTDs and parameter
        !           237:     entities.
        !           238:         </para>
        !           239:         <para>
        !           240:    Normally <command>&dhpackage;</command> never parses parameter
        !           241:    entities.  <option>-p</option> tells it to always parse them.
        !           242:    <option>-p</option> implies <option>-x</option>.
        !           243:           </para>
        !           244:         </listitem>
        !           245:       </varlistentry>
        !           246: 
        !           247:       <varlistentry>
        !           248:         <term><option>-r</option></term>
        !           249:         <listitem>
        !           250:                <para>
        !           251:    Normally <command>&dhpackage;</command> memory-maps the XML file
        !           252:    before parsing; this can result in faster parsing on many
        !           253:    platforms.
        !           254:    <option>-r</option> turns off memory-mapping and uses normal file
        !           255:    IO calls instead.
        !           256:    Of course, memory-mapping is automatically turned off
        !           257:    when reading from standard input.
        !           258:           </para>
        !           259:                <para>
        !           260:    Use of memory-mapping can cause some platforms to report
        !           261:    substantially higher memory usage for
        !           262:    <command>&dhpackage;</command>, but this appears to be a matter of
        !           263:    the operating system reporting memory in a strange way; there is
        !           264:    not a leak in <command>&dhpackage;</command>.
        !           265:            </para>
        !           266:         </listitem>
        !           267:       </varlistentry>
        !           268: 
        !           269:       <varlistentry>
        !           270:         <term><option>-s</option></term>
        !           271:         <listitem>
        !           272:                <para>
        !           273:   Prints an error if the document is not standalone. 
        !           274:   A document is standalone if it has no external subset and no
        !           275:   references to parameter entities.
        !           276:           </para>
        !           277:         </listitem>
        !           278:       </varlistentry>
        !           279: 
        !           280:       <varlistentry>
        !           281:         <term><option>-t</option></term>
        !           282:         <listitem>
        !           283:                <para>
        !           284:   Turns on timings.  This tells Expat to parse the entire file,
        !           285:   but not perform any processing.
        !           286:   This gives a fairly accurate idea of the raw speed of Expat itself
        !           287:   without client overhead.
        !           288:   <option>-t</option> turns off most of the output options
        !           289:   (<option>-d</option>, <option>-m</option>, <option>-c</option>,
        !           290:   ...).
        !           291:           </para>
        !           292:         </listitem>
        !           293:       </varlistentry>
        !           294: 
        !           295:       <varlistentry>
        !           296:         <term><option>-v</option></term>
        !           297:         <listitem>
        !           298:                <para>
        !           299:   Prints the version of the Expat library being used, including some
        !           300:   information on the compile-time configuration of the library, and
        !           301:   then exits.
        !           302:           </para>
        !           303:         </listitem>
        !           304:       </varlistentry>
        !           305: 
        !           306:       <varlistentry>
        !           307:         <term><option>-w</option></term>
        !           308:         <listitem>
        !           309:                <para>
        !           310:   Enables support for Windows code pages.
        !           311:   Normally, <command>&dhpackage;</command> will throw an error if it
        !           312:   runs across an encoding that it is not equipped to handle itself.  With
        !           313:   <option>-w</option>, &dhpackage; will try to use a Windows code
        !           314:   page.  See also <option>-e</option>.
        !           315:           </para>
        !           316:         </listitem>
        !           317:       </varlistentry>
        !           318: 
        !           319:       <varlistentry>
        !           320:         <term><option>-x</option></term>
        !           321:         <listitem>
        !           322:                <para>
        !           323:   Turns on parsing external entities.
        !           324:   </para>
        !           325: <para>
        !           326:   Non-validating parsers are not required to resolve external
        !           327:   entities, or even expand entities at all.
        !           328:   Expat always expands internal entities (?),
        !           329:   but external entity parsing must be enabled explicitly.
        !           330:   </para>
        !           331:   <para>
        !           332:   External entities are simply entities that obtain their
        !           333:   data from outside the XML file currently being parsed.
        !           334:   </para>
        !           335:   <para>
        !           336:   This is an example of an internal entity:
        !           337: <literallayout>
        !           338: &lt;!ENTITY vers '1.0.2'&gt;
        !           339: </literallayout>
        !           340:   </para>
        !           341:   <para>
        !           342:   And here are some examples of external entities:
        !           343: 
        !           344: <literallayout>
        !           345: &lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
        !           346: &lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
        !           347: </literallayout>
        !           348: 
        !           349:           </para>
        !           350:         </listitem>
        !           351:       </varlistentry>
        !           352: 
        !           353:       <varlistentry>
        !           354:         <term><option>--</option></term>
        !           355:         <listitem>
        !           356:                <para>
        !           357:     (Two hyphens.)
        !           358:     Terminates the list of options.  This is only needed if a filename
        !           359:     starts with a hyphen.  For example:
        !           360:           </para>
        !           361: <literallayout>
        !           362: &dhpackage; -- -myfile.xml
        !           363: </literallayout>
        !           364:                <para>
        !           365:     will run <command>&dhpackage;</command> on the file
        !           366:     <filename>-myfile.xml</filename>.
        !           367:           </para>
        !           368:         </listitem>
        !           369:       </varlistentry>
        !           370:     </variablelist>
        !           371: 
        !           372:        <para>
        !           373:     Older versions of <command>&dhpackage;</command> do not support
        !           374:     reading from standard input.
        !           375:        </para>
        !           376:   </refsect1>
        !           377: 
        !           378:   <refsect1>
        !           379:   <title>OUTPUT</title>
        !           380:     <para>
        !           381:        If an input file is not well-formed,
        !           382:        <command>&dhpackage;</command> prints a single line describing
        !           383:        the problem to standard output.  If a file is well formed,
        !           384:        <command>&dhpackage;</command> outputs nothing.
        !           385:        Note that the result code is <emphasis>not</emphasis> set.
        !           386:        </para>
        !           387:   </refsect1>
        !           388:   
        !           389:   <refsect1>
        !           390:     <title>BUGS</title>
        !           391:        <para>
        !           392:        According to the W3C standard, an XML file without a
        !           393:        declaration at the beginning is not considered well-formed.
        !           394:        However, <command>&dhpackage;</command> allows this to pass.
        !           395:        </para>
        !           396:        <para>
        !           397:        <command>&dhpackage;</command> returns a 0 - noerr result,
        !           398:        even if the file is not well-formed.  There is no good way for
        !           399:        a program to use <command>&dhpackage;</command> to quickly
        !           400:        check a file -- it must parse <command>&dhpackage;</command>'s
        !           401:        standard output.
        !           402:        </para>
        !           403:        <para>
        !           404:        The errors should go to standard error, not standard output.
        !           405:        </para>
        !           406:        <para>
        !           407:        There should be a way to get <option>-d</option> to send its
        !           408:        output to standard output rather than forcing the user to send
        !           409:        it to a file.
        !           410:        </para>
        !           411:        <para>
        !           412:        I have no idea why anyone would want to use the
        !           413:        <option>-d</option>, <option>-c</option>, and
        !           414:        <option>-m</option> options.  If someone could explain it to
        !           415:        me, I'd like to add this information to this manpage.
        !           416:        </para>
        !           417:   </refsect1>
        !           418: 
        !           419:   <refsect1>
        !           420:     <title>ALTERNATIVES</title>
        !           421:        <para>
        !           422:          Here are some XML validators on the web:
        !           423: 
        !           424: <literallayout>
        !           425:
        !           426:
        !           427:
        !           428:
        !           429: </literallayout>
        !           430: 
        !           431:                 </para>
        !           432:   </refsect1>
        !           433: 
        !           434:   <refsect1>
        !           435:     <title>SEE ALSO</title>
        !           436:        <para>
        !           437: 
        !           438: <literallayout>
        !           439: The Expat home page:
        !           440: The W3 XML specification:
        !           441: </literallayout>
        !           442: 
        !           443:        </para>
        !           444:   </refsect1>
        !           445: 
        !           446:   <refsect1>
        !           447:     <title>AUTHOR</title>
        !           448:     <para>
        !           449:          This manual page was written by &dhusername; &dhemail; for
        !           450:       the &debian; system (but may be used by others).  Permission is
        !           451:       granted to copy, distribute and/or modify this document under
        !           452:       the terms of the <acronym>GNU</acronym> Free Documentation
        !           453:       License, Version 1.1.
        !           454:        </para>
        !           455:   </refsect1>
        !           456: </refentry>
        !           457: 
