Annotation of gpl/axl/src/axl.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *  @internal
                      3:  * 
                      4:  *  LibAxl:  Another XML library 
                      5:  *  Copyright (C) 2006 Advanced Software Production Line, S.L.
                      6:  *
                      7:  *  This program is free software; you can redistribute it and/or
                      8:  *  modify it under the terms of the GNU Lesser General Public License
                      9:  *  as published by the Free Software Foundation; either version 2.1 of
                     10:  *  the License, or (at your option) any later version.
                     11:  *
                     12:  *  This program is distributed in the hope that it will be useful,
                     13:  *  but WITHOUT ANY WARRANTY; without even the implied warranty of 
                     14:  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
                     15:  *  GNU Lesser General Public License for more details.
                     16:  *
                     17:  *  You should have received a copy of the GNU Lesser General Public
                     18:  *  License along with this program; if not, write to the Free
                     19:  *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
                     20:  *  02111-1307 USA
                     21:  *  
                     22:  *  You may find a copy of the license under this software is released
                     23:  *  at COPYING file. This is LGPL software: you are welcome to
                     24:  *  develop proprietary applications using this library without any
                     25:  *  royalty or fee but returning back any change, improvement or
                     26:  *  addition in the form of source code, project image, documentation
                     27:  *  patches, etc. 
                     28:  *
                     29:  *  For commercial support on build XML enabled solutions contact us:
                     30:  *          
                     31:  *      Postal address:
                     32:  *         Advanced Software Production Line, S.L.
                     33:  *         Edificio Alius A, Oficina 102,
                     34:  *         C/ Antonio Suarez Nº 10,
                     35:  *         Alcalá de Henares 28802 Madrid
                     36:  *         Spain
                     37:  *
                     38:  *      Email address:
                     39:  *         info@aspl.es - http://www.aspl.es/xml
                     40:  */
                     41: #include <axl.h>
                     42: 
                     43: /** 
                     44:  * \mainpage AXL: Another XML Library implementation (XML 1.0 standard)
                     45:  *
                     46:  * \section intro Introduction
                     47:  *
                     48:  * AXL is a library which aims to implement the XML 1.0 standard, as
                     49:  * defined at the XML 1.0 third edition
                     50:  * recommendation found at: http://www.w3.org/TR/REC-xml/. 
                     51:  * 
                     52:  * It was implemented to support XML requirements inside projects
                     53:  * developed by <a href="http://www.aspl.es">Advanced Software
                     54:  * Production Line, S.L.</a>, especially <a
                     55:  * href="http://fact.aspl.es">Af-Arch</a> and <a
                     56:  * href="http://vortex.aspl.es">Vortex Library</a>, which are already
                     57:  * using the library successfully in production environments.
                     58:  * 
                     59:  * Some features this library has are:
                     60:  *
                     61:  * <ul>
                     62:  * 
                     63:  * <li><b>A clean implementation</b>, that only includes, those elements
                     64:  * defined, and only those, inside the XML 1.0 standard, as defined in <a href="http://www.w3.org/TR/REC-xml/">the third edition</a>.</li>
                     65:  *
                     66:  * <li><b>Fast and memory efficient</b> implementation. If you still
                     67:  * think that XML is slow and memory inefficient, you didn't taste Axl
                     68:  * Library. You'll find report about its performance and memory usage at: http://www.aspl.es/xml/doc.html </li>
                     69:  *
                     70:  * <li>Ensure that the library is implemented using abstract data types,
                     71:  * commonly known as opaque types, to avoid exposing details to the
                     72:  * user space code that is consuming the API. 
                     73:  * This will allow us to improve the library without breaking existing
                     74:  * code and, the most important, to not be limited by details already
                     75:  * exposed.
                     76:  * In fact, this design have being already used to improve the library
                     77:  * performance greatly. See reports founds at: http://www.aspl.es/xml/doc.html</a>.</li>
                     78:  *
                     79:  * <li><b>Small and efficient</b>, ensuring on every stable release that
                     80:  * the library do not leak, not only while using the library in a
                     81:  * proper manner but also when errors were found. </li>
                     82:  *
                     83:  * <li>Have a <b>modular</b> design that allows to use only those
                     84:  * elements required by your software. At this moment it is provided a
                     85:  * base library (with implements XML 1.0) and optional libraries that
                     86:  * provides additional features to support XML Namespaces 1.0 and
                     87:  * extended encodings.</li>
                     88:  *
                     89:  * <li>To have an <b>small footprint</b> is a requirement. Currently LibAxl (118K) and LibAxl-Ns (9K)</li>
                     90:  *
                     91:  * <li><b>Bindings</b> for other languages. At this moment it is officially supported <b>PyAxl</b>, a binding for the Python Language.</li>
                     92:  * </ul>
                     93:  *
                     94:  * \section features_and_status What is the status of the library
                     95:  *
                     96:  * Currently the library is stable and it is known to work under
                     97:  * GNU/Linux and Windows with a really good performance. See reports found at http://www.aspl.es/xml/doc.html to know more about this.
                     98:  *
                     99:  * The library already covers the 95% of common requires that XML
                    100:  * development needs. Among others, it support:
                    101:  *
                    102:  *  - XML tree parsing, from memory and files, allowing a great level
                    103:  * of detail while accessing to the data (comments, process
                    104:  * instructions, xml nodes, and content).
                    105:  * 
                    106:  * - Mostly completed DTD validation support, including <!ATTLIST>, <!ENTITY>
                    107:  * and <!ELEMENT> elements. Remains to implement NOTATION declarations
                    108:  * and full entity replacement.
                    109:  *
                    110:  *  - Two modes to inspect the xml documents at the same time,
                    111:  *  <b>MIXED API</b>: an API to traverse the document allowing access
                    112:  *  to all items found (\ref axlItem) inside the document (\ref
                    113:  *  axlDoc) root node (\ref axlNode), and <b>CHILDREN API</b>: an API
                    114:  *  that allows to traverse the node using as reference only the nodes
                    115:  *  (\ref axlNode) inside the document (\ref axlDoc).
                    116:  *
                    117:  *  - <a href="http://www.w3.org/TR/REC-xml-names/">XML 1.0
                    118:  *  Namespaces</a> full support, through the additional component
                    119:  *  (<b>libaxl-ns</b>), allowing to produce xml applications that are
                    120:  *  XML Namespace aware. 
                    121:  *
                    122:  *  - Support for extended encodings (<b>libaxl-babel</b>), beyond default encoding utf-8. 
                    123:  * 
                    124:  * \section documentation Library Documentation
                    125:  *
                    126:  * The library documentation is composed into two pieces. The Axl
                    127:  * manual and the API documentation. 
                    128:  * 
                    129:  * - \ref axl_install
                    130:  * - \ref axl_manual
                    131:  * - \ref axl_api
                    132:  * - \ref axl_knife_manual
                    133:  * 
                    134:  * \section contact_us Contact us
                    135:  * 
                    136:  * If you find something not properly documented, or some question is
                    137:  * not answered on this documentation, check the <a href="http://www.aspl.es/axl/doc.html">mailing list</a>.
                    138:  *
                    139:  * You can also contact us if you have patches, improvements or
                    140:  * suggestions at the mailing list.
                    141:  */
                    142: 
                    143: /** 
                    144:  * \page axl_manual XML development with Axl Library
                    145:  *
                    146:  * \section Manual Index
                    147:  *
                    148:  * On this manual you'll find the following section:
                    149:  * 
                    150:  * <b>Section 1: Basic elements to understand XML and Axl </b><br>
                    151:  *
                    152:  * - \ref intro
                    153:  * - \ref concepts
                    154:  * - \ref two_apis
                    155:  *
                    156:  * <b>Section 2: Manipulating and producing XML documents </b><br>
                    157:  *
                    158:  * - \ref parsing
                    159:  * - \ref iterating
                    160:  * - \ref modifying
                    161:  * - \ref dumping_functions
                    162:  *
                    163:  * <b>Section 3: Doing validation on your documents </b><br>
                    164:  *
                    165:  * - \ref validation
                    166:  * - \ref xml_namespace
                    167:  * - \ref using_axl_babel
                    168:  *
                    169:  * <b>Section 4: Advanced topics </b><br>
                    170:  *
                    171:  * - \ref reducing_foot_print
                    172:  *
                    173:  * <b>Apendix</b><br>
                    174:  *
                    175:  * - \ref futher
                    176:  *
                    177:  *
                    178:  * \section intro Introduction: XML development
                    179:  * 
                    180:  * XML 1.0 definition allows to build documents that could be used to
                    181:  * represents textual information, remote procedure invocations or
                    182:  * dynamic user interfaces. Its definition is based on very simple
                    183:  * principles, that allows to developers to compose them to create
                    184:  * bigger abstractions that are roughly on every place in modern
                    185:  * computer software design.
                    186:  *
                    187:  * It is a "quite" human readable format, so you will find that is not
                    188:  * the best format if you are looking for space efficiency. What XML
                    189:  * 1.0 provides you on the other hand is the ability to quickly
                    190:  * prototype and produce working formats that encapsulate your data,
                    191:  * and, as your system evolves, XML 1.0 will do it with you.
                    192:  *
                    193:  * Among other things, XML 1.0 provides you ways to validate your
                    194:  * documents to ensure your code will read XML documents in the format
                    195:  * expected, reducing the time and development cost due to additional
                    196:  * checkings required.
                    197:  *
                    198:  * Before continuing, we will explain some concepts that are required
                    199:  * to understand XML 1.0 and why the Axl API was built this way.
                    200:  *
                    201:  * \section concepts Some concepts before starting to use Axl Library
                    202:  *
                    203:  * Here is a simple example of a XML 1.0 document:
                    204:  * <div class="xml-doc">
                    205:  * \code
                    206:  * <?xml version="1.0">
                    207:  * <!-- This is a comment -->
                    208:  * <complex>
                    209:  *   <data>
                    210:  *     <simple>10</simple>
                    211:  *     <empty attr1="value1" />
                    212:  *   </data>
                    213:  * </complex>
                    214:  * \endcode
                    215:  * </div>
                    216:  *
                    217:  * Previous XML document represents an structure with a top level
                    218:  * node, called <b>complex</b>, that has one single child called
                    219:  * <b>data</b> which in turn have two childs. The first one is the
                    220:  * child called <b>simple</b> that have content and other one, called
                    221:  * <b>empty</b>, which is a node usually called an empty xml node.
                    222:  *
                    223:  * The XML representation for previous document is the following:
                    224:  * \image html image01.png "Document representation"
                    225:  *
                    226:  * Several issues must be considered while interpreting previous
                    227:  * diagram and how Axl library parse and expose those elements through
                    228:  * the API to the client application:
                    229:  * <ul>
                    230:  * 
                    231:  * <li>Every XML document have a root node (\ref axl_doc_get_root). Without exception. In this
                    232:  * case, the root node for our example is <b>complex</b>. </li>
                    233:  * 
                    234:  * <li>If a node have content, that content is not represented with
                    235:  * another node. That content is associated to the node and could be
                    236:  * retrieved using several function (\ref axl_node_get_content, \ref
                    237:  * axl_node_get_content_copy and \ref
                    238:  * axl_node_get_content_trans). 
                    239:  * 
                    240:  * Alternatively, while using the MIXED API, you can traverse child
                    241:  * items stored for a particular node, detecting those items that are
                    242:  * \ref ITEM_CONTENT or \ref ITEM_CDATA (using \ref
                    243:  * axl_item_get_type). </li>
                    244:  *
                    245:  * <li>Having a node (\ref axlNode) with content doesn't mean to have a node with
                    246:  * childs. The child notion is only about having more xml nodes (\ref axlNode) as
                    247:  * childs. 
                    248:  * 
                    249:  * This is particularly important if you take into consideration that a
                    250:  * node could have content (\ref ITEM_CONTENT), comments (\ref
                    251:  * ITEM_COMMENT), application process instructions (\ref ITEM_PI),
                    252:  * CDATA content (uninterpreted content \ref ITEM_CDATA), all of them
                    253:  * mixed with more xml nodes (\ref ITEM_NODE). </li>
                    254:  *
                    255:  * <li>A final node which is empty because it doesn't have content or
                    256:  * childs, is usually referred to as <b>EMPTY</b> type node. A final
                    257:  * node with content but no childs is usually referred to as
                    258:  * <b>PCDATA</b>. A node that have content mixed with references to
                    259:  * more child xml nodes is referred to as <b>MIXED</b>.</li>
                    260:  *
                    261:  * <li>At the <b>empty</b> node, you'll find that it has an attribute
                    262:  * called <b>attr1</b> with a value <b>value1</b>. A node could have
                    263:  * any number of attributes but, it should be named
                    264:  * differently. Again, if a node is empty, it keeps empty even if it
                    265:  * has attributes.
                    266:  * </li>
                    267:  *
                    268:  * So, to summarize, we have a root node, that could contain more
                    269:  * nodes, that could contain PCDATA, or content, and those nodes could
                    270:  * contain named attributes with values.
                    271:  *
                    272:  * \section two_apis MIXED and CHILDREN API: How to use them
                    273:  *
                    274:  * XML 1.0 is used for a variety of purposes, some of them requires
                    275:  * the CHILDREN API and the rest the MIXED API. To <i>require</i>, we
                    276:  * mean that it fits better, so you will get better results, your
                    277:  * application will react in a proper manner and you'll have to do
                    278:  * less work.
                    279:  *
                    280:  * The reason for this API is simple. XML 1.0 definition allows to mix
                    281:  * content with more nodes, comments and many more elements to be
                    282:  * placed as childs for a particular node. 
                    283:  * 
                    284:  * This definition, found at the standard, have moved many XML
                    285:  * implementations to support only an API that support all these
                    286:  * features, that is, an interface that is complicated and overloaded,
                    287:  * that gives you a power that you don't require, making your
                    288:  * development more inefficient.
                    289:  *
                    290:  * As a result, when a developer only requires a usual form of xml,
                    291:  * called CHILDREN, that means nodes have only another childs
                    292:  * nodes or content but not both at the same time. This kind of xml is
                    293:  * really useful, easy to parse, easy to make a DTD definition, more
                    294:  * compact and extensible.
                    295:  *
                    296:  * Lets see an example for both formats to clarify:
                    297:  * 
                    298:  * <div class="xml-doc">
                    299:  * \code
                    300:  * <?xml version='1.0' ?>
                    301:  * <document>
                    302:  *    <!-- Children XML format example: as you can see      -->
                    303:  *    <!-- nodes only contains either nodes or node content -->
                    304:  *    <!-- but nothing mixed at the same level              -->
                    305:  *    <node1> 
                    306:  *       This is node1 content 
                    307:  *    </node1>
                    308:  *    <node2>
                    309:  *      <node3>
                    310:  *         This is node3 content
                    311:  *      </node3>
                    312:  *      <node4 />
                    313:  *    </node2>
                    314:  * </document>
                    315:  * \endcode
                    316:  * </div>
                    317:  *
                    318:  * While an MIXED xml document could be:
                    319:  * 
                    320:  * <div class="xml-doc">
                    321:  * \code
                    322:  * <?xml version='1.0' ?>
                    323:  * <document>
                    324:  *    <!-- Children XML format example: as you can see      -->
                    325:  *    <!-- nodes only contains either nodes or node content -->
                    326:  *    <!-- but nothing mixed at the same level              -->
                    327:  *    <node1> 
                    328:  *       This is node1 content 
                    329:  *    </node1>
                    330:  *    Content mixed with xml nodes at the same level. 
                    331:  *    <node2>
                    332:  *      More content....
                    333:  *      <node3>
                    334:  *         This is node3 content
                    335:  *      </node3>
                    336:  *      <node4 />
                    337:  *    </node2>
                    338:  * </document>
                    339:  * \endcode
                    340:  * </div>
                    341:  *
                    342:  * Both approaches, which are valid using the XML 1.0 standard, are
                    343:  * appropriate for particular situations:
                    344:  * 
                    345:  * - CHILDREN API: compact representations, configuration files, rpc
                    346:  * invocation description, graphical user interface definition.
                    347:  * - MIXED API: textual description, for example: XSL-FO.
                    348:  *
                    349:  * Having introduced the context of the problem, Axl Library takes no
                    350:  * position, providing an API that fits while developing xml content
                    351:  * that follows a CHILDREN description and an API for the MIXED
                    352:  * description.
                    353:  * 
                    354:  * In this context, which API you use, will only affect to the way you
                    355:  * traverse the document. The CHILDREN API is mainly provided by the
                    356:  * \ref axl_node_module "Axl Node interface" and the MIXED API is
                    357:  * mainly provided by the \ref axl_item_module "Axl Item interface".
                    358:  *
                    359:  * You don't need to do any especial operation to activate both APIs,
                    360:  * both are provided at the same time. Lets see an example:
                    361:  *
                    362:  * Supposing the previous mixed example, the following code will get
                    363:  * access to the &lt;node2> reference:
                    364:  * \code
                    365:  * // supposing "doc" reference contains the document loaded
                    366:  * axlNode * node;
                    367:  * 
                    368:  * // get the document root, that is <document>
                    369:  * node = axl_doc_get_root (doc);
                    370:  *
                    371:  * // get the first child for the document root (<node1>)
                    372:  * node = axl_node_get_first_child (node);
                    373:  *
                    374:  * // get the next child (brother of <node1>, that is <node2>)
                    375:  * node = axl_node_get_next (node);
                    376:  * \endcode
                    377:  *
                    378:  * However, with the MIXED API you can get every detail, every item
                    379:  * found for a particular node. This is how:
                    380:  * 
                    381:  * \code
                    382:  * // supposing "doc" reference contains the document loaded
                    383:  * axlNode * node;
                    384:  * axlItem * item;
                    385:  * 
                    386:  * // get the document root, that is <document>
                    387:  * node = axl_doc_get_root (doc);
                    388:  *
                    389:  * // get the first item child for the document root that is the comment:
                    390:  * //    "Children XML format example: as you can see".
                    391:  * item = axl_item_get_first_child (node);
                    392:  *
                    393:  * // now skip the following two comments
                    394:  * item = axl_item_get_next (item);
                    395:  * item = axl_item_get_next (item);
                    396:  *
                    397:  * // now the next item is holding the <node1>
                    398:  * item = axl_item_get_next (item);
                    399:  * node = axl_item_get_data (item);
                    400:  *
                    401:  * // now get the content between the <node1> and <node2>
                    402:  * item = axl_item_get_next (item);
                    403:  *
                    404:  * // and finally, get the next child (brother of <node1>, that is
                    405:  * // <node2>)
                    406:  * item = axl_item_get_next (item);
                    407:  * node = axl_item_get_data (item);
                    408:  * \endcode
                    409:  *
                    410:  * Obviously, the mixed example contains more code and it is more
                    411:  * fragile to xml document changes. The problem is that the MIXED API
                    412:  * is more general than the CHILDREN, making XML libraries to only
                    413:  * provide that API.
                    414:  *
                    415:  * As a consequence:
                    416:  *
                    417:  * - You only need to use the MIXED API (\ref axlItem) if you are
                    418:  * going to do an xml application that allows having content mixed
                    419:  * with nodes, comments, etc, and you want to get access to such
                    420:  * content.
                    421:  *
                    422:  * - If you are planing to develop an XML solution that represents
                    423:  * information (user interfaces), module descriptions, configuration
                    424:  * files, etc, try to use the CHILDREN API: it will save you lot of
                    425:  * work! Remember, CHILDREN xml format: childs are either content or
                    426:  * more xml nodes but not both. Never mixed.
                    427:  *
                    428:  * \section parsing Parsing XML documents
                    429:  * 
                    430:  * We have seen how an XML document is. Now we are going to see how to
                    431:  * parse those document into data structures that are usable to
                    432:  * inspect the content. All parsing functions are available at the
                    433:  * \ref axl_doc_module "Axl Doc interface".
                    434:  *
                    435:  * Let's start with a very simple example:
                    436:  *
                    437:  * \code
                    438:  * #include <axl.h>
                    439:  * #include <stdio.h>
                    440:  *
                    441:  * int main (int argc, char ** argv)
                    442:  * {
                    443:  *    axlError ** error;
                    444:  *
                    445:  *    // top level definitions 
                    446:  *    axlDoc * doc = NULL;
                    447:  *
                    448:  *    // initialize axl library 
                    449:  *    if (! axl_init ()) {
                    450:  *        printf ("Unable to initialize Axl library\n");
                    451:  *       return -1;
                    452:  *    }
                    453:  *
                    454:  *    // get current doc reference 
                    455:  *    doc = axl_doc_parse_from_file ("large.xml", error);
                    456:  *    if (doc == NULL) {
                    457:  *        axl_error_free (error);
                    458:  *        return axl_false;
                    459:  *    }
                    460:  *
                    461:  *    // DO SOME WORK WITH THE DOCUMENT HERE
                    462:  *
                    463:  *    // release the document 
                    464:  *    axl_doc_free (doc);
                    465:  *
                    466:  *    // cleanup axl library 
                    467:  *    axl_end ();
                    468:  *
                    469:  *    return axl_true;
                    470:  * }
                    471:  * \endcode
                    472:  *
                    473:  * \section iterating Traveling an XML document
                    474:  * 
                    475:  * Once the document is loaded you can use several function to
                    476:  * traverse the document. 
                    477:  * 
                    478:  * First you must use \ref axl_doc_get_root to get the document root
                    479:  * (\ref axlNode) which contains all the information. Then, according
                    480:  * to the interface you are using, you must call to either \ref
                    481:  * axl_node_get_first_child or \ref axl_item_get_first_child.
                    482:  * 
                    483:  * Once you have access to the first element, you can use the
                    484:  * following set of function to get more references to other nodes or
                    485:  * items:
                    486:  *
                    487:  * <ul>
                    488:  *  <li><b>MIXED API</b>: 
                    489:  *
                    490:  *    - \ref axl_item_get_first_child
                    491:  *    - \ref axl_item_get_last_child
                    492:  *    - \ref axl_item_get_next
                    493:  *    - \ref axl_item_get_previous
                    494:  *
                    495:  *  </li>
                    496:  *  <li><b>CHILDREN API</b>:
                    497:  *
                    498:  *    - \ref axl_node_get_first_child
                    499:  *    - \ref axl_node_get_last_child
                    500:  *    - \ref axl_node_get_next
                    501:  *    - \ref axl_node_get_previous
                    502:  *
                    503:  *  </li>
                    504:  * </ul>
                    505:  *
                    506:  * There are alternative APIs that will allow you to iterate the
                    507:  * document, providing a callback: \ref axl_doc_iterate. 
                    508:  *
                    509:  * Another approach is to use \ref axl_doc_get and \ref
                    510:  * axl_doc_get_content_at to get fast access to a particular node
                    511:  * using a really limited XPath syntax.
                    512:  *
                    513:  * \section modifying Modifying a loaded XML document
                    514:  *
                    515:  * One feature that comes with Axl Library is ability to modify the
                    516:  * content, replacing it with other content and transferring node node
                    517:  * to another place.
                    518:  * 
                    519:  * Check the following function while operating with \ref axlNode elements:
                    520:  * 
                    521:  *   - \ref axl_node_replace
                    522:  *   - \ref axl_node_remove
                    523:  *   - \ref axl_node_transfer_childs
                    524:  *
                    525:  * Check the following functions while operating with \ref axlItem elements:
                    526:  * 
                    527:  *   - \ref axl_item_replace
                    528:  *   - \ref axl_item_remove
                    529:  *   - \ref axl_item_transfer_childs_after
                    530:  *
                    531:  * \section dumping_functions Producing xml documents from memory
                    532:  *
                    533:  * Axl Library comes with several functions to perform xml memory dump
                    534:  * operations, allowing to translate a xml representation (\ref axlDoc
                    535:  * or \ref axlNode) into a string:
                    536:  *
                    537:  * - \ref axl_doc_dump
                    538:  * - \ref axl_doc_dump_pretty
                    539:  * - \ref axl_doc_dump_to_file
                    540:  * - \ref axl_doc_dump_pretty_to_file
                    541:  *
                    542:  * In the case you want to produce xml content taking as reference a
                    543:  * particular node use:
                    544:  *  
                    545:  * - \ref axl_node_dump
                    546:  * - \ref axl_node_dump_pretty
                    547:  * - \ref axl_node_dump_to_file
                    548:  * - \ref axl_node_dump_pretty_to_file
                    549:  *
                    550:  * \section validation Validating XML documents
                    551:  *
                    552:  * Once you are familiar with the Axl API, or any other XML toolkit,
                    553:  * it turns that it is not a good practice to write lot of source code
                    554:  * to check node names expected or how they are nested. This makes
                    555:  * your program really weak to changes and makes your to write more
                    556:  * code that is not actual work but a simple environment check.
                    557:  *
                    558:  * You may also need to check that some XML document received follows
                    559:  * a defined XML structure, but it is too complex to be done.
                    560:  *
                    561:  * For this purpose, XML 1.0 defines DTD or (Document Type Definition)
                    562:  * which allows to specify the document grammar, how are nested
                    563:  * nodes, which attributes could contain, or if the are allocated to
                    564:  * be empty nodes or nodes that must have another child nodes.
                    565:  *
                    566:  * Let start with the DTD syntax used to configure restrictions about
                    567:  * node structure:
                    568:  *
                    569:  * <div class="xml-doc">
                    570:  * \code
                    571:  * <!-- sequence specification -->
                    572:  * <!ELEMENT testA (test1, test2, test3)>
                    573:  *
                    574:  * <!-- choice specification -->
                    575:  * <!ELEMENT testB (test1 | test2 | test3)>
                    576:  * \endcode
                    577:  * </div>
                    578:  *
                    579:  *
                    580:  * DTD <b><!ELEMENT</b> is modeled on top of two concepts which are
                    581:  * later expanded with repetition patterns. We will explain then
                    582:  * later. For now, this two top level concepts are: sequence and choice.
                    583:  *
                    584:  * Sequence specification (elements separated by <b>, (comma)</b>, the
                    585:  * one used to apply restriction to the node <b>testA</b>, are used to
                    586:  * denote that <b>testA</b> have as childs test1, followed by test2
                    587:  * and ended by test3. The order specified must be followed and all
                    588:  * instances must appear. This could be tweaked using repetition
                    589:  * pattern.
                    590:  *
                    591:  * In the other hand, choice specification (elements separated by
                    592:  * <b>| (pipe)</b>, are used to specify that the content of a node is
                    593:  * built using nodes of the choice list. So, in this case,
                    594:  * <b>testB</b> node could have either one instance of test1 or test2
                    595:  * or test3.
                    596:  *
                    597:  * Now you know these to basic elements to model how childs are
                    598:  * organized for a node, what it is need is to keep on adding more
                    599:  * <!ELEMENT directives until all nodes are specified. You will end
                    600:  * your DTD document with final nodes that are either empty ones or
                    601:  * have PCDATA. At this moment MIXED nodes are not supported.
                    602:  *
                    603:  * Suppose that all nodes that are inside testA and testB are final
                    604:  * ones. Then this could be its DTD specification:
                    605:  *
                    606:  * <div class="xml-doc">
                    607:  * \code
                    608:  * <!-- test1 is a node that only have content -->
                    609:  * <!ELEMENT test1 (#PCDATA)>
                    610:  *
                    611:  * <!-- test2 is a node that is always empty -->
                    612:  * <!ELEMENT test1 EMPTY>
                    613:  *
                    614:  * <!-- test3 is a node that could have either test1 or test2 -->
                    615:  * <!ELEMENT test3 (test1 | test2)>
                    616:  * \endcode
                    617:  * </div>
                    618:  *
                    619:  * Sequences and choices could be composed to create richer DTD
                    620:  * expressions that combines sequences of choices and so on.
                    621:  * 
                    622:  * At this point all required elements to model choices, sequences and
                    623:  * final nodes are explained, but, we have to talk about repetition
                    624:  * pattern. They are symbols that are appended to elements inside
                    625:  * choices (or sequences) including those list specifications.
                    626:  *
                    627:  * Patterns available are: <b>+</b>, <b>?</b> and <b>*</b>. By
                    628:  * default, if no pattern is applied to the element, it means that the
                    629:  * match should be produced one and only one time.
                    630:  *
                    631:  * The <b>+</b> pattern is used to model that element should be
                    632:  * matched one, and at least one, or more.
                    633:  *
                    634:  * The <b>*</b> pattern is used to model elements that should be
                    635:  * matched zero or any times.
                    636:  *
                    637:  * The <b>?</b> pattern is used to model elements that should be
                    638:  * matched zero or one times.
                    639:  *
                    640:  * For the exampled initially explained, let's suppose we want that
                    641:  * the content inside <b>testA</b> have sequences repeated at leat one
                    642:  * time, being that sequence: test1, test2 and test3. We only need to
                    643:  * add a <b>+</b> repetition pattern as follows:
                    644:  *
                    645:  * <div class="xml-doc">
                    646:  * \code
                    647:  * <!-- sequence specification -->
                    648:  * <!ELEMENT testA (test1, test2, test3)+>
                    649:  * \endcode
                    650:  * </div>
                    651:  *
                    652:  * So, we are saying to our validation engine that the sequence inside
                    653:  * testA could be found one or many times, but the entire sequence
                    654:  * match be found every time.
                    655:  *
                    656:  * Here is an simple example that loads an XML document, then loads an
                    657:  * DTD file, and then validates the XML document:
                    658:  * \code
                    659:  * bool test_12 (axlError ** error) 
                    660:  * {
                    661:  *     axlDoc * doc = NULL;
                    662:  *     axlDtd * dtd = NULL;
                    663:  *
                    664:  *     // parse gmovil file (an af-arch xml chunk) 
                    665:  *     doc = axl_doc_parse_from_file ("channel.xml", error); 
                    666:  *     if (doc == NULL) 
                    667:  *             return axl_false;
                    668:  *
                    669:  *     // parse af-arch DTD 
                    670:  *     dtd = axl_dtd_parse_from_file ("channel.dtd", error);
                    671:  *     if (dtd == NULL)
                    672:  *             return axl_false;
                    673:  *
                    674:  *     // perform DTD validation 
                    675:  *     if (! axl_dtd_validate (doc, dtd, error)) {
                    676:  *             return axl_false;
                    677:  *     }
                    678:  *
                    679:  *     // free doc reference 
                    680:  *     axl_doc_free (doc); 
                    681:  *
                    682:  *      // free dtd reference
                    683:  *      axl_doc_free (dtd);
                    684:  * 
                    685:  *      return axl_true;
                    686:  * }
                    687:  * \endcode
                    688:  *
                    689:  * Until now, we have seen how to check xml structure. But this do not
                    690:  * cover xml node attributes. This is checked by using
                    691:  * <b>&lt;!ATTLIST></b> declaration.
                    692:  *
                    693:  * In the case we have a node <b>testA</b> with two attribuets
                    694:  * <b>attr1</b> and <b>attr2</b> the first one optional and the second
                    695:  * one mandatory, we can declare something like:
                    696:  *
                    697:  * <div class="xml-doc">
                    698:  * \code
                    699:  * <!-- attribute validation for node testA -->
                    700:  * <!ATTLIST testA 
                    701:  *           attr1     CDATA   #IMPLIED
                    702:  *           attr2     CDATA   #REQUIRED>
                    703:  * \endcode
                    704:  * </div>
                    705:  * 
                    706:  *
                    707:  * \section xml_namespace Enabling your software with XML Namespaces
                    708:  *
                    709:  * XML 1.0 initial design didn't take care about situations where
                    710:  * several software vendors could introduce content inside the same
                    711:  * XML documents. This has several benefits, but one problem to solve:
                    712:  * <i>how to avoid xml node names (tags) to clash from each other.</i>
                    713:  *
                    714:  * Think about using &lt;table> as a tag for your document. Many XML
                    715:  * applications uses &lt;table> as a valid tag for its XML language
                    716:  * set. However, each of them has a different meaning and must be
                    717:  * handled by the proper XML software.
                    718:  *
                    719:  * While developing applications with XML, and supposing such XML
                    720:  * documents will be used by more applications than yours, you are
                    721:  * likely to be interested in use XML Namespaces. In other words, many
                    722:  * of the new XML standards that are appearing uses XML Namespaces to
                    723:  * allow defining its xml node names, while allowing users/developers
                    724:  * to use their own set of xml tags, under their own XML Namespaces,
                    725:  * in order they can use them in the same document.
                    726:  *
                    727:  * XML Namespaces support inside Axl Library is handled through a
                    728:  * separated library, which requires the base library to function. \ref axl_install "Here are some instructions to get Axl Library Namespace installed."
                    729:  *
                    730:  * This library provides functions that replaces some of the functions
                    731:  * used by XML applications that don't require XML Namespaces. In
                    732:  * particular, some of them are:
                    733:  * 
                    734:  * - \ref axl_ns_doc_validate (see this for an example)
                    735:  * - \ref axl_ns_node_cmp
                    736:  *
                    737:  * See also API documentation for all functions that are provided to
                    738:  * enable your application with XML Namespaces:
                    739:  * 
                    740:  * - \ref axl_ns_doc_module
                    741:  * - \ref axl_ns_node_module
                    742:  *
                    743:  * \section using_axl_babel Making your software to support other encodings than UTF-8
                    744:  *
                    745:  * Default axl library implementation (libaxl) assumes it will receive
                    746:  * and produce UTF-8 content. 
                    747:  * 
                    748:  * Because the subset of characters that are used to properly parse
                    749:  * XML content are located in the ASCII range, still valid UTF-8, but
                    750:  * at same time valid in other encodings such ISO 646, some part of
                    751:  * ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width
                    752:  * encoding which ensures that the characters of ASCII have their
                    753:  * normal positions, width, and values (See section F. Autodetecting
                    754:  * of Character Encodings at http://www.w3.org/TR/REC-xml/), causes
                    755:  * the library to properly parse the content, even if it is not UTF-8.
                    756:  *
                    757:  * In many cases this is not important for you since your application
                    758:  * do not care about content codification (such configuration files)
                    759:  * or they are in UTF-8.
                    760:  *
                    761:  * However, this could present problems if you are handling different
                    762:  * documents with several encoding types. The idea is to have an
                    763:  * unified way to handle such different encoded documents, with a
                    764:  * single, run-time encoding: UTF-8.
                    765:  *
                    766:  * <b>libaxl-babel</b> provides support to read content in supported
                    767:  * codifications and translate it into UTF-8 at run-time (checking
                    768:  * result to be valid UTF-8):
                    769:  *
                    770:  * \image html axl_babel_reading.png "Reading documents and handle them as they were in UTF-8"
                    771:  *
                    772:  * The library works as an extension that configures a set of handlers
                    773:  * making the library to open XML documents and translating them into
                    774:  * UTF-8 if required.
                    775:  * 
                    776:  * To activate the library, you must use \ref axl_babel_init at the
                    777:  * begining of your application or library. Here is an example:
                    778:  *
                    779:  * \code
                    780:  * // optional axlError declaration
                    781:  * axlError * error;
                    782:  * 
                    783:  * // init axl babel 
                    784:  * if (! axl_babel_init (&error)) {
                    785:  *      printf ("Failed to start axl babel: %s...\n",
                    786:  *              axl_error_get (error));
                    787:  *      axl_error_free (error);
                    788:  *      return axl_false;
                    789:  * } 
                    790:  * \endcode
                    791:  * 
                    792:  * Once done, every call to the base API (such \ref axl_doc_parse,
                    793:  * \ref axl_doc_parse_from_file) will open the document as usual. It
                    794:  * is not required to perform any additional special operation.
                    795:  *
                    796:  * It is not required to call to \ref axl_babel_finish on application
                    797:  * exit. However, in the case you want to deactivate
                    798:  * <b>libaxl-babel</b> but still keep on using axl base library, you
                    799:  * can use \ref axl_babel_finish.
                    800:  *
                    801:  * See \ref axl_babel_init for currently supported formats.
                    802:  * 
                    803:  * \section reducing_foot_print How to reduce the library footprint 
                    804:  *
                    805:  * Axl Library is implemented in a modular way to ensure you are only
                    806:  * linked against those software elements that you really
                    807:  * require. Additionally, the library allows the following to reduce
                    808:  * the library footprint to the minimum: 
                    809:  *
                    810:  * <ul>
                    811:  * <li><b>Remove log information:</b> <br> Axl library uses a console log
                    812:  * mechanism to report what's happening during processing. See \ref
                    813:  * axl_log_module "Axl Log reporting" module for more
                    814:  * information. However, under production environments this console
                    815:  * log isn't necessary, so you can safely remove it, at compile time,
                    816:  * using <b>--axl-log-disable</b> as follow:
                    817:  *
                    818:  * \code
                    819:  * >> ./configure --axl-log-disable
                    820:  * \endcode
                    821:  *
                    822:  * According to our results, the library including the log to console
                    823:  * information is about <b>366K</b>. Without log to console information the
                    824:  * library takes about <b>288K</b>.
                    825:  *
                    826:  * </li> 
                    827:  * 
                    828:  * <li><b>Remove debugging information from the library: </b> <br>You
                    829:  * can also remove debugging information from your library on
                    830:  * production environments doing the following once finished compilation process:
                    831:  *
                    832:  * \code
                    833:  * >> make install-strip
                    834:  * \endcode
                    835:  *
                    836:  * According to our results, the library without log to console and
                    837:  * debugging information takes about <b>100K</b>.
                    838:  *
                    839:  * </li>
                    840:  * </ul>
                    841:  *
                    842:  * Previous information applies to the Axl base Library
                    843:  * (libaxl.so/.dll), however the same happens for the rest of software
                    844:  * components bundle with Axl.
                    845:  * 
                    846:  *
                    847:  * \section futher Futher reading where to go for more information
                    848:  * 
                    849:  * You can also check \ref axl_api "API documentation" for a complete
                    850:  * detailed explanation about the library. 
                    851:  *
                    852:  * Please, if you find that something isn't properly documented or you
                    853:  * think that something could be improved, contact us in the mailing
                    854:  * list. We are building Axl Library with the aim to produce a high
                    855:  * quality, commercial grade, open source XML development kit, so, any
                    856:  * help received will be welcome.
                    857:  *
                    858:  * Remember you can always contact us at the mailing list for any
                    859:  * question not properly answered by this documentation. See <a
                    860:  * href="http://www.aspl.es/axl/doc.html">Axl Library website
                    861:  * documentation</a> to get more
                    862:  * information about mailing list.
                    863:  *
                    864:  */
                    865: 
                    866: /** 
                    867:  * \page axl_api LibAxl API documentation
                    868:  *
                    869:  * Here is the API for the modules defined inside the library:
                    870:  * 
                    871:  * <ul>
                    872:  *  <li><b>Basic API to interact with XML documents (base library libaxl): </b></li>
                    873:  * 
                    874:  *    - \ref axl_module
                    875:  *    - \ref axl_doc_module
                    876:  *    - \ref axl_node_module
                    877:  *    - \ref axl_node_attribute_cursor
                    878:  *    - \ref axl_node_annotate
                    879:  *    - \ref axl_item_module
                    880:  *    - \ref axl_dtd_module
                    881:  *    - \ref axl_decl_module
                    882:  *    - \ref axl_handlers
                    883:  *
                    884:  *  </li>
                    885:  *  <li><b>XML Namespaces API (required additional library libaxl-ns): </b></li>
                    886:  *
                    887:  *    - \ref axl_ns_doc_module
                    888:  *    - \ref axl_ns_node_module
                    889:  *
                    890:  *  </li>
                    891:  *  <li><b>Axl BABEL API (required to support additional encoding formats): </b></li>
                    892:  *
                    893:  *    - \ref axl_babel
                    894:  *
                    895:  *  </li>
                    896:  *  <li><b>Error reporting and debugging functions (base library libaxl): </b></li>
                    897:  * 
                    898:  *    - \ref axl_error_module
                    899:  *    - \ref axl_log_module
                    900:  *
                    901:  *  <li><b>Auxiliary modules, supporting data types, string handling, etc (base library libaxl):</b></li>
                    902:  *
                    903:  * - \ref axl_stream_module
                    904:  * - \ref axl_list_module
                    905:  * - \ref axl_list_cursor_module
                    906:  * - \ref axl_stack_module
                    907:  * - \ref axl_binary_stack_module
                    908:  * - \ref axl_hash_module
                    909:  * - \ref axl_hash_cursor_module
                    910:  * - \ref axl_string_module 
                    911:  *
                    912:  *  </li>
                    913:  * </ul>
                    914:  * 
                    915:  */
                    916: 
                    917: /** 
                    918:  * @page axl_install Installing Axl library and using it
                    919:  *
                    920:  * \section intro Introduction
                    921:  *
                    922:  * Axl library is an XML library written in ANSI C, which is known to
                    923:  * work on Microsoft Windows, GNU/Linux, *-BSD, GNU/Linux under
                    924:  * Amd64/Itanium and MacOS/X platforms. 
                    925:  *
                    926:  * The library package is composed by the following items:
                    927:  * 
                    928:  * \image html axl-components.png "Axl library components"
                    929:  * 
                    930:  * - <b>libaxl</b>: base library implementing all XML functions, and
                    931:  * common API used by extension libraries. It has no external
                    932:  * dependencies.
                    933:  *
                    934:  * - <b>libaxl-ns</b>: optional library, built on top of libaxl, which
                    935:  * provides Namespaces support. It depends on libaxl.
                    936:  *
                    937:  * - <b>libaxl-babel</b>: optional library, built on top of libaxl,
                    938:  * which provides extended encoding support to defult utf-8.
                    939:  *
                    940:  * - <b>axl-knife</b>: command line tool, built on top of libaxl and
                    941:  * libaxl-ns. It depends on the base library and the namespace
                    942:  * library.
                    943:  *
                    944:  * Here are a set of instructions to get the library compiled for your
                    945:  * platform:
                    946:  *
                    947:  * \section axl_on_unix GNU/Linux (or any posix OS) installation instructions
                    948:  *
                    949:  * First, download the package from the download section. Check <a
                    950:  * href="http://www.aspl.es/axl/doc.html">this section to know more
                    951:  * about this.</a>
                    952:  *
                    953:  * Then use the standard procedure to get compiled an autotool ready
                    954:  * package. Here are the steps:
                    955:  *
                    956:  * \code
                    957:  *  >> tar xzvf axl-0.2.1.b1984.g1985.tar.gz
                    958:  *  >> cd axl-0.2.1.b1984.g1985/
                    959:  *  >> ./configure
                    960:  *  >> make
                    961:  *  >> make install
                    962:  * \endcode
                    963:  * 
                    964:  * \section checking_axl Checking Axl Library compilation (regression test)
                    965:  *
                    966:  * Once finished, to check your axl build to properly work, get inside
                    967:  * the test/ directory and run axl regression test (test_01):
                    968:  * \code
                    969:  *  >> cd test/
                    970:  *  >> ./test_01
                    971:  * \endcode
                    972:  * 
                    973:  * All test must execute properly to ensure the library is fully functional. 
                    974:  *
                    975:  * \section windows Windows installation instructions
                    976:  *
                    977:  * Axl library comes with packages already built for Microsoft Windows platforms.
                    978:  * Check <a href="http://www.aspl.es/axl/doc.html">the download section to get them</a>.
                    979:  *
                    980:  * \section using Using LibAxl library (installations with pkg-config infrastructure)
                    981:  *
                    982:  * Axl Library is composed by a base library, which provides all XML
                    983:  * 1.0 function. Additionally, a separated library is provided for
                    984:  * Namespace functions. 
                    985:  * 
                    986:  * Axl library comes with pkg-config support, which makes easy to use
                    987:  * it inside your pkg-config enable projects.
                    988:  *
                    989:  * To check current support for your Axl Library you can execute:
                    990:  * 
                    991:  * \code
                    992:  *  >> pkg-config --libs --cflags axl
                    993:  * \endcode
                    994:  *
                    995:  * To give support for XML Namespaces (libaxl-ns), you must use:
                    996:  * \code
                    997:  *  >> pkg-config --libs --cflags axl-ns
                    998:  * \endcode
                    999:  *
                   1000:  * To give support for extended encoding support (libaxl-babel), you must use:
                   1001:  * \code
                   1002:  *  >> pkg-config --libs --cflags axl-babel
                   1003:  * \endcode
                   1004:  *
                   1005:  * To include support into your autotool checks (configure.ac/in
                   1006:  * files) you can place the following piece:
                   1007:  * \code
                   1008:  * PKG_CHECK_MODULES(LIBRARIES, axl) 
                   1009:  * \endcode
                   1010:  *
                   1011:  * Again, add <b>axl-ns</b> or <b>axl-babel</b> to the previous
                   1012:  * instruction if you want your package to also check for Axl Library
                   1013:  * Namespace support.
                   1014:  * 
                   1015:  * \section Including Axl Library headers
                   1016:  *
                   1017:  * For all platforms, Axl Library base headers has to be included as follows:
                   1018:  * \code
                   1019:  * #include <axl.h>
                   1020:  * \endcode
                   1021:  *
                   1022:  * In the case Namespace is required, you must use: 
                   1023:  * \code
                   1024:  * #include <axl_ns.h>
                   1025:  * \endcode
                   1026:  *
                   1027:  * To include babel support, you must use: 
                   1028:  * \code
                   1029:  * #include <axl_babel.h>
                   1030:  * \endcode
                   1031:  */
                   1032: 
                   1033: 
                   1034: /**
                   1035:  * \defgroup axl_module Axl main: Init functions for the library
                   1036:  */
                   1037: 
                   1038: /** 
                   1039:  * \addtogroup axl_module
                   1040:  * @{
                   1041:  */
                   1042: 
                   1043: /** 
                   1044:  * @brief Initializes Axl library.
                   1045:  *
                   1046:  * Currently this function is not required to be executed because
                   1047:  * libaxl implementation don't use it. The implementation will try to
                   1048:  * make no used for internal variable initialization.
                   1049:  *
                   1050:  * However the API is provided for future usage.
                   1051:  * 
                   1052:  * @return The function returns \ref axl_true if it was properly
                   1053:  * initialized or \ref axl_false if something fails.  if fails.
                   1054:  */
                   1055: axl_bool axl_init (void)
                   1056: {
                   1057:        /* nothing to initialize dude */
                   1058:        return axl_true;
                   1059: }
                   1060: 
                   1061: /** 
                   1062:  * @brief Allows to terminate libaxl library function.
                   1063:  *
                   1064:  * Currently this function is not required becasue nothing is done to
                   1065:  * stop libaxl function. The implementation will try to keep this,
                   1066:  * however, if the future this could be required.
                   1067:  */
                   1068: void axl_end (void)
                   1069: {
                   1070:        /* nothing to do jack */
                   1071:        return;
                   1072: }
                   1073: 
                   1074: /* @} */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>