File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / expat / doc / xmlwf.sgml
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 16:09:43 2014 UTC (10 years, 1 month ago) by misho
Branches: expat, MAIN
CVS tags: v2_1_0, HEAD
expat 2.1.0

    1: <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
    2: 
    3: <!-- Process this file with docbook-to-man to generate an nroff manual
    4:      page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
    5:      the manual page with: `docbook-to-man manpage.sgml | nroff -man |
    6:      less'.  A typical entry in a Makefile or Makefile.am is:
    7: 
    8: manpage.1: manpage.sgml
    9: 	docbook-to-man $< > $@
   10:   -->
   11: 
   12:   <!-- Fill in your name for FIRSTNAME and SURNAME. -->
   13:   <!ENTITY dhfirstname "<firstname>Scott</firstname>">
   14:   <!ENTITY dhsurname   "<surname>Bronson</surname>">
   15:   <!-- Please adjust the date whenever revising the manpage. -->
   16:   <!ENTITY dhdate      "<date>December  5, 2001</date>">
   17:   <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
   18:        allowed: see man(7), man(1). -->
   19:   <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
   20:   <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
   21:   <!ENTITY dhusername  "Scott Bronson">
   22:   <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
   23:   <!ENTITY dhpackage   "xmlwf">
   24: 
   25:   <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
   26:   <!ENTITY gnu         "<acronym>GNU</acronym>">
   27: ]>
   28: 
   29: <refentry>
   30:   <refentryinfo>
   31:     <address>
   32:       &dhemail;
   33:     </address>
   34:     <author>
   35:       &dhfirstname;
   36:       &dhsurname;
   37:     </author>
   38:     <copyright>
   39:       <year>2001</year>
   40:       <holder>&dhusername;</holder>
   41:     </copyright>
   42:     &dhdate;
   43:   </refentryinfo>
   44:   <refmeta>
   45:     &dhucpackage;
   46: 
   47:     &dhsection;
   48:   </refmeta>
   49:   <refnamediv>
   50:     <refname>&dhpackage;</refname>
   51: 
   52:     <refpurpose>Determines if an XML document is well-formed</refpurpose>
   53:   </refnamediv>
   54:   <refsynopsisdiv>
   55:     <cmdsynopsis>
   56:       <command>&dhpackage;</command>
   57: 	  <arg><option>-s</option></arg>
   58: 	  <arg><option>-n</option></arg>
   59: 	  <arg><option>-p</option></arg>
   60: 	  <arg><option>-x</option></arg>
   61: 
   62: 	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
   63: 	  <arg><option>-w</option></arg>
   64: 
   65: 	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
   66: 	  <arg><option>-c</option></arg>
   67: 	  <arg><option>-m</option></arg>
   68: 
   69: 	  <arg><option>-r</option></arg>
   70: 	  <arg><option>-t</option></arg>
   71: 
   72: 	  <arg><option>-v</option></arg>
   73: 
   74: 	  <arg>file ...</arg>
   75:     </cmdsynopsis>
   76:   </refsynopsisdiv>
   77:  
   78:   <refsect1>
   79:     <title>DESCRIPTION</title>
   80: 
   81:     <para>
   82: 	<command>&dhpackage;</command> uses the Expat library to
   83: 	determine if an XML document is well-formed.  It is
   84: 	non-validating.
   85: 	</para>
   86: 
   87: 	<para>
   88: 	If you do not specify any files on the command-line, and you
   89: 	have a recent version of <command>&dhpackage;</command>, the
   90: 	input file will be read from standard input.
   91: 	</para>
   92: 
   93:   </refsect1>
   94: 
   95:   <refsect1>
   96:     <title>WELL-FORMED DOCUMENTS</title>
   97: 
   98: 	<para>
   99: 	  A well-formed document must adhere to the
  100: 	  following rules:
  101: 	</para>
  102: 
  103: 	<itemizedlist>
  104:       <listitem><para>
  105: 	    The file begins with an XML declaration.  For instance,
  106: 		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
  107: 		<emphasis>NOTE:</emphasis>
  108: 		<command>&dhpackage;</command> does not currently
  109: 		check for a valid XML declaration.
  110:       </para></listitem>
  111:       <listitem><para>
  112: 		Every start tag is either empty (&lt;tag/&gt;)
  113: 		or has a corresponding end tag.
  114:       </para></listitem>
  115:       <listitem><para>
  116: 	    There is exactly one root element.  This element must contain
  117: 		all other elements in the document.  Only comments, white
  118: 		space, and processing instructions may come after the close
  119: 		of the root element.
  120:       </para></listitem>
  121:       <listitem><para>
  122: 		All elements nest properly.
  123:       </para></listitem>
  124:       <listitem><para>
  125: 		All attribute values are enclosed in quotes (either single
  126: 		or double).
  127:       </para></listitem>
  128:     </itemizedlist>
  129: 
  130: 	<para>
  131: 	  If the document has a DTD, and it strictly complies with that
  132: 	  DTD, then the document is also considered <emphasis>valid</emphasis>.
  133: 	  <command>&dhpackage;</command> is a non-validating parser --
  134: 	  it does not check the DTD.  However, it does support
  135: 	  external entities (see the <option>-x</option> option).
  136: 	</para>
  137:   </refsect1>
  138: 
  139:   <refsect1>
  140:     <title>OPTIONS</title>
  141: 
  142: <para>
  143: When an option includes an argument, you may specify the argument either
  144: separately ("<option>-d</option> output") or concatenated with the
  145: option ("<option>-d</option>output").  <command>&dhpackage;</command>
  146: supports both.
  147: </para>
  148: 
  149:     <variablelist>
  150: 
  151:       <varlistentry>
  152:         <term><option>-c</option></term>
  153:         <listitem>
  154: 		<para>
  155:   If the input file is well-formed and <command>&dhpackage;</command>
  156:   doesn't encounter any errors, the input file is simply copied to
  157:   the output directory unchanged.
  158:   This implies no namespaces (turns off <option>-n</option>) and
  159:   requires <option>-d</option> to specify an output file.
  160:   		</para>
  161:         </listitem>
  162:       </varlistentry>
  163: 
  164:       <varlistentry>
  165:         <term><option>-d output-dir</option></term>
  166:         <listitem>
  167: 		<para>
  168:   Specifies a directory to contain transformed
  169:   representations of the input files.
  170:   By default, <option>-d</option> outputs a canonical representation
  171:   (described below).
  172:   You can select different output formats using <option>-c</option>
  173:   and <option>-m</option>.
  174: 	  </para>
  175: 	  <para>
  176:   The output filenames will
  177:   be exactly the same as the input filenames or "STDIN" if the input is
  178:   coming from standard input.  Therefore, you must be careful that the
  179:   output file does not go into the same directory as the input
  180:   file.  Otherwise, <command>&dhpackage;</command> will delete the
  181:   input file before it generates the output file (just like running
  182:   <literal>cat &lt; file &gt; file</literal> in most shells).
  183: 	  </para>
  184: 	  <para> 
  185:   Two structurally equivalent XML documents have a byte-for-byte
  186:   identical canonical XML representation.
  187:   Note that ignorable white space is considered significant and
  188:   is treated equivalently to data.
  189:   More on canonical XML can be found at
  190:   http://www.jclark.com/xml/canonxml.html .
  191: 	  </para>
  192:         </listitem>
  193:       </varlistentry>
  194: 
  195:       <varlistentry>
  196:         <term><option>-e encoding</option></term>
  197:         <listitem>
  198: 		<para>
  199:    Specifies the character encoding for the document, overriding
  200:    any document encoding declaration.  <command>&dhpackage;</command>
  201:    supports four built-in encodings:
  202:    	<literal>US-ASCII</literal>,
  203: 	<literal>UTF-8</literal>,
  204: 	<literal>UTF-16</literal>, and
  205: 	<literal>ISO-8859-1</literal>.
  206:    Also see the <option>-w</option> option.
  207: 	   </para>
  208:         </listitem>
  209:       </varlistentry>
  210: 
  211:       <varlistentry>
  212:         <term><option>-m</option></term>
  213:         <listitem>
  214: 		<para>
  215:   Outputs some strange sort of XML file that completely
  216:   describes the the input file, including character postitions.
  217:   Requires <option>-d</option> to specify an output file.
  218: 	   </para>
  219:         </listitem>
  220:       </varlistentry>
  221: 
  222:       <varlistentry>
  223:         <term><option>-n</option></term>
  224:         <listitem>
  225: 		<para>
  226:   Turns on namespace processing.  (describe namespaces)
  227:   <option>-c</option> disables namespaces.
  228: 	   </para>
  229:         </listitem>
  230:       </varlistentry>
  231: 
  232:       <varlistentry>
  233:         <term><option>-p</option></term>
  234:         <listitem>
  235: 		<para>
  236:     Tells xmlwf to process external DTDs and parameter
  237:     entities.
  238: 	 </para>
  239: 	 <para>
  240:    Normally <command>&dhpackage;</command> never parses parameter
  241:    entities.  <option>-p</option> tells it to always parse them.
  242:    <option>-p</option> implies <option>-x</option>.
  243: 	   </para>
  244:         </listitem>
  245:       </varlistentry>
  246: 
  247:       <varlistentry>
  248:         <term><option>-r</option></term>
  249:         <listitem>
  250: 		<para>
  251:    Normally <command>&dhpackage;</command> memory-maps the XML file
  252:    before parsing; this can result in faster parsing on many
  253:    platforms.
  254:    <option>-r</option> turns off memory-mapping and uses normal file
  255:    IO calls instead.
  256:    Of course, memory-mapping is automatically turned off
  257:    when reading from standard input.
  258: 	   </para>
  259: 		<para>
  260:    Use of memory-mapping can cause some platforms to report
  261:    substantially higher memory usage for
  262:    <command>&dhpackage;</command>, but this appears to be a matter of
  263:    the operating system reporting memory in a strange way; there is
  264:    not a leak in <command>&dhpackage;</command>.
  265:            </para>
  266:         </listitem>
  267:       </varlistentry>
  268: 
  269:       <varlistentry>
  270:         <term><option>-s</option></term>
  271:         <listitem>
  272: 		<para>
  273:   Prints an error if the document is not standalone. 
  274:   A document is standalone if it has no external subset and no
  275:   references to parameter entities.
  276: 	   </para>
  277:         </listitem>
  278:       </varlistentry>
  279: 
  280:       <varlistentry>
  281:         <term><option>-t</option></term>
  282:         <listitem>
  283: 		<para>
  284:   Turns on timings.  This tells Expat to parse the entire file,
  285:   but not perform any processing.
  286:   This gives a fairly accurate idea of the raw speed of Expat itself
  287:   without client overhead.
  288:   <option>-t</option> turns off most of the output options
  289:   (<option>-d</option>, <option>-m</option>, <option>-c</option>,
  290:   ...).
  291: 	   </para>
  292:         </listitem>
  293:       </varlistentry>
  294: 
  295:       <varlistentry>
  296:         <term><option>-v</option></term>
  297:         <listitem>
  298: 		<para>
  299:   Prints the version of the Expat library being used, including some
  300:   information on the compile-time configuration of the library, and
  301:   then exits.
  302: 	   </para>
  303:         </listitem>
  304:       </varlistentry>
  305: 
  306:       <varlistentry>
  307:         <term><option>-w</option></term>
  308:         <listitem>
  309: 		<para>
  310:   Enables support for Windows code pages.
  311:   Normally, <command>&dhpackage;</command> will throw an error if it
  312:   runs across an encoding that it is not equipped to handle itself.  With
  313:   <option>-w</option>, &dhpackage; will try to use a Windows code
  314:   page.  See also <option>-e</option>.
  315: 	   </para>
  316:         </listitem>
  317:       </varlistentry>
  318: 
  319:       <varlistentry>
  320:         <term><option>-x</option></term>
  321:         <listitem>
  322: 		<para>
  323:   Turns on parsing external entities.
  324:   </para>
  325: <para>
  326:   Non-validating parsers are not required to resolve external
  327:   entities, or even expand entities at all.
  328:   Expat always expands internal entities (?),
  329:   but external entity parsing must be enabled explicitly.
  330:   </para>
  331:   <para>
  332:   External entities are simply entities that obtain their
  333:   data from outside the XML file currently being parsed.
  334:   </para>
  335:   <para>
  336:   This is an example of an internal entity:
  337: <literallayout>
  338: &lt;!ENTITY vers '1.0.2'&gt;
  339: </literallayout>
  340:   </para>
  341:   <para>
  342:   And here are some examples of external entities:
  343: 
  344: <literallayout>
  345: &lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
  346: &lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
  347: </literallayout>
  348: 
  349: 	   </para>
  350:         </listitem>
  351:       </varlistentry>
  352: 
  353:       <varlistentry>
  354:         <term><option>--</option></term>
  355:         <listitem>
  356: 		<para>
  357:     (Two hyphens.)
  358:     Terminates the list of options.  This is only needed if a filename
  359:     starts with a hyphen.  For example:
  360: 	   </para>
  361: <literallayout>
  362: &dhpackage; -- -myfile.xml
  363: </literallayout>
  364: 		<para>
  365:     will run <command>&dhpackage;</command> on the file
  366:     <filename>-myfile.xml</filename>.
  367: 	   </para>
  368:         </listitem>
  369:       </varlistentry>
  370:     </variablelist>
  371: 
  372: 	<para>
  373:     Older versions of <command>&dhpackage;</command> do not support
  374:     reading from standard input.
  375: 	</para>
  376:   </refsect1>
  377: 
  378:   <refsect1>
  379:   <title>OUTPUT</title>
  380:     <para>
  381: 	If an input file is not well-formed,
  382: 	<command>&dhpackage;</command> prints a single line describing
  383: 	the problem to standard output.  If a file is well formed,
  384: 	<command>&dhpackage;</command> outputs nothing.
  385: 	Note that the result code is <emphasis>not</emphasis> set.
  386: 	</para>
  387:   </refsect1>
  388:   
  389:   <refsect1>
  390:     <title>BUGS</title>
  391: 	<para>
  392: 	<command>&dhpackage;</command> returns a 0 - noerr result,
  393: 	even if the file is not well-formed.  There is no good way for
  394: 	a program to use <command>&dhpackage;</command> to quickly
  395: 	check a file -- it must parse <command>&dhpackage;</command>'s
  396: 	standard output.
  397: 	</para>
  398: 	<para>
  399: 	The errors should go to standard error, not standard output.
  400: 	</para>
  401: 	<para>
  402: 	There should be a way to get <option>-d</option> to send its
  403: 	output to standard output rather than forcing the user to send
  404: 	it to a file.
  405: 	</para>
  406: 	<para>
  407: 	I have no idea why anyone would want to use the
  408: 	<option>-d</option>, <option>-c</option>, and
  409: 	<option>-m</option> options.  If someone could explain it to
  410: 	me, I'd like to add this information to this manpage.
  411: 	</para>
  412:   </refsect1>
  413: 
  414:   <refsect1>
  415:     <title>ALTERNATIVES</title>
  416: 	<para>
  417: 	  Here are some XML validators on the web:
  418: 
  419: <literallayout>
  420: http://www.hcrc.ed.ac.uk/~richard/xml-check.html
  421: http://www.stg.brown.edu/service/xmlvalid/
  422: http://www.scripting.com/frontier5/xml/code/xmlValidator.html
  423: http://www.xml.com/pub/a/tools/ruwf/check.html
  424: </literallayout>
  425: 
  426: 		 </para>
  427:   </refsect1>
  428: 
  429:   <refsect1>
  430:     <title>SEE ALSO</title>
  431: 	<para>
  432: 
  433: <literallayout>
  434: The Expat home page:        http://www.libexpat.org/
  435: The W3 XML specification:   http://www.w3.org/TR/REC-xml
  436: </literallayout>
  437: 
  438: 	</para>
  439:   </refsect1>
  440: 
  441:   <refsect1>
  442:     <title>AUTHOR</title>
  443:     <para>
  444: 	  This manual page was written by &dhusername; &dhemail; for
  445:       the &debian; system (but may be used by others).  Permission is
  446:       granted to copy, distribute and/or modify this document under
  447:       the terms of the <acronym>GNU</acronym> Free Documentation
  448:       License, Version 1.1.
  449: 	</para>
  450:   </refsect1>
  451: </refentry>
  452: 
  453: <!-- Keep this comment at the end of the file
  454: Local variables:
  455: mode: sgml
  456: sgml-omittag:t
  457: sgml-shorttag:t
  458: sgml-minimize-attributes:nil
  459: sgml-always-quote-attributes:t
  460: sgml-indent-step:2
  461: sgml-indent-data:t
  462: sgml-parent-document:nil
  463: sgml-default-dtd-file:nil
  464: sgml-exposed-tags:nil
  465: sgml-local-catalogs:nil
  466: sgml-local-ecat-files:nil
  467: End:
  468: -->

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>