Annotation of gpl/axl/src/axl.c, revision 1.1.1.1
1.1 misho 1: /*
2: * @internal
3: *
4: * LibAxl: Another XML library
5: * Copyright (C) 2006 Advanced Software Production Line, S.L.
6: *
7: * This program is free software; you can redistribute it and/or
8: * modify it under the terms of the GNU Lesser General Public License
9: * as published by the Free Software Foundation; either version 2.1 of
10: * the License, or (at your option) any later version.
11: *
12: * This program is distributed in the hope that it will be useful,
13: * but WITHOUT ANY WARRANTY; without even the implied warranty of
14: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: * GNU Lesser General Public License for more details.
16: *
17: * You should have received a copy of the GNU Lesser General Public
18: * License along with this program; if not, write to the Free
19: * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20: * 02111-1307 USA
21: *
22: * You may find a copy of the license under this software is released
23: * at COPYING file. This is LGPL software: you are welcome to
24: * develop proprietary applications using this library without any
25: * royalty or fee but returning back any change, improvement or
26: * addition in the form of source code, project image, documentation
27: * patches, etc.
28: *
29: * For commercial support on build XML enabled solutions contact us:
30: *
31: * Postal address:
32: * Advanced Software Production Line, S.L.
33: * Edificio Alius A, Oficina 102,
34: * C/ Antonio Suarez Nº 10,
35: * Alcalá de Henares 28802 Madrid
36: * Spain
37: *
38: * Email address:
39: * info@aspl.es - http://www.aspl.es/xml
40: */
41: #include <axl.h>
42:
43: /**
44: * \mainpage AXL: Another XML Library implementation (XML 1.0 standard)
45: *
46: * \section intro Introduction
47: *
48: * AXL is a library which aims to implement the XML 1.0 standard, as
49: * defined at the XML 1.0 third edition
50: * recommendation found at: http://www.w3.org/TR/REC-xml/.
51: *
52: * It was implemented to support XML requirements inside projects
53: * developed by <a href="http://www.aspl.es">Advanced Software
54: * Production Line, S.L.</a>, especially <a
55: * href="http://fact.aspl.es">Af-Arch</a> and <a
56: * href="http://vortex.aspl.es">Vortex Library</a>, which are already
57: * using the library successfully in production environments.
58: *
59: * Some features this library has are:
60: *
61: * <ul>
62: *
63: * <li><b>A clean implementation</b>, that only includes, those elements
64: * defined, and only those, inside the XML 1.0 standard, as defined in <a href="http://www.w3.org/TR/REC-xml/">the third edition</a>.</li>
65: *
66: * <li><b>Fast and memory efficient</b> implementation. If you still
67: * think that XML is slow and memory inefficient, you didn't taste Axl
68: * Library. You'll find report about its performance and memory usage at: http://www.aspl.es/xml/doc.html </li>
69: *
70: * <li>Ensure that the library is implemented using abstract data types,
71: * commonly known as opaque types, to avoid exposing details to the
72: * user space code that is consuming the API.
73: * This will allow us to improve the library without breaking existing
74: * code and, the most important, to not be limited by details already
75: * exposed.
76: * In fact, this design have being already used to improve the library
77: * performance greatly. See reports founds at: http://www.aspl.es/xml/doc.html</a>.</li>
78: *
79: * <li><b>Small and efficient</b>, ensuring on every stable release that
80: * the library do not leak, not only while using the library in a
81: * proper manner but also when errors were found. </li>
82: *
83: * <li>Have a <b>modular</b> design that allows to use only those
84: * elements required by your software. At this moment it is provided a
85: * base library (with implements XML 1.0) and optional libraries that
86: * provides additional features to support XML Namespaces 1.0 and
87: * extended encodings.</li>
88: *
89: * <li>To have an <b>small footprint</b> is a requirement. Currently LibAxl (118K) and LibAxl-Ns (9K)</li>
90: *
91: * <li><b>Bindings</b> for other languages. At this moment it is officially supported <b>PyAxl</b>, a binding for the Python Language.</li>
92: * </ul>
93: *
94: * \section features_and_status What is the status of the library
95: *
96: * Currently the library is stable and it is known to work under
97: * GNU/Linux and Windows with a really good performance. See reports found at http://www.aspl.es/xml/doc.html to know more about this.
98: *
99: * The library already covers the 95% of common requires that XML
100: * development needs. Among others, it support:
101: *
102: * - XML tree parsing, from memory and files, allowing a great level
103: * of detail while accessing to the data (comments, process
104: * instructions, xml nodes, and content).
105: *
106: * - Mostly completed DTD validation support, including <!ATTLIST>, <!ENTITY>
107: * and <!ELEMENT> elements. Remains to implement NOTATION declarations
108: * and full entity replacement.
109: *
110: * - Two modes to inspect the xml documents at the same time,
111: * <b>MIXED API</b>: an API to traverse the document allowing access
112: * to all items found (\ref axlItem) inside the document (\ref
113: * axlDoc) root node (\ref axlNode), and <b>CHILDREN API</b>: an API
114: * that allows to traverse the node using as reference only the nodes
115: * (\ref axlNode) inside the document (\ref axlDoc).
116: *
117: * - <a href="http://www.w3.org/TR/REC-xml-names/">XML 1.0
118: * Namespaces</a> full support, through the additional component
119: * (<b>libaxl-ns</b>), allowing to produce xml applications that are
120: * XML Namespace aware.
121: *
122: * - Support for extended encodings (<b>libaxl-babel</b>), beyond default encoding utf-8.
123: *
124: * \section documentation Library Documentation
125: *
126: * The library documentation is composed into two pieces. The Axl
127: * manual and the API documentation.
128: *
129: * - \ref axl_install
130: * - \ref axl_manual
131: * - \ref axl_api
132: * - \ref axl_knife_manual
133: *
134: * \section contact_us Contact us
135: *
136: * If you find something not properly documented, or some question is
137: * not answered on this documentation, check the <a href="http://www.aspl.es/axl/doc.html">mailing list</a>.
138: *
139: * You can also contact us if you have patches, improvements or
140: * suggestions at the mailing list.
141: */
142:
143: /**
144: * \page axl_manual XML development with Axl Library
145: *
146: * \section Manual Index
147: *
148: * On this manual you'll find the following section:
149: *
150: * <b>Section 1: Basic elements to understand XML and Axl </b><br>
151: *
152: * - \ref intro
153: * - \ref concepts
154: * - \ref two_apis
155: *
156: * <b>Section 2: Manipulating and producing XML documents </b><br>
157: *
158: * - \ref parsing
159: * - \ref iterating
160: * - \ref modifying
161: * - \ref dumping_functions
162: *
163: * <b>Section 3: Doing validation on your documents </b><br>
164: *
165: * - \ref validation
166: * - \ref xml_namespace
167: * - \ref using_axl_babel
168: *
169: * <b>Section 4: Advanced topics </b><br>
170: *
171: * - \ref reducing_foot_print
172: *
173: * <b>Apendix</b><br>
174: *
175: * - \ref futher
176: *
177: *
178: * \section intro Introduction: XML development
179: *
180: * XML 1.0 definition allows to build documents that could be used to
181: * represents textual information, remote procedure invocations or
182: * dynamic user interfaces. Its definition is based on very simple
183: * principles, that allows to developers to compose them to create
184: * bigger abstractions that are roughly on every place in modern
185: * computer software design.
186: *
187: * It is a "quite" human readable format, so you will find that is not
188: * the best format if you are looking for space efficiency. What XML
189: * 1.0 provides you on the other hand is the ability to quickly
190: * prototype and produce working formats that encapsulate your data,
191: * and, as your system evolves, XML 1.0 will do it with you.
192: *
193: * Among other things, XML 1.0 provides you ways to validate your
194: * documents to ensure your code will read XML documents in the format
195: * expected, reducing the time and development cost due to additional
196: * checkings required.
197: *
198: * Before continuing, we will explain some concepts that are required
199: * to understand XML 1.0 and why the Axl API was built this way.
200: *
201: * \section concepts Some concepts before starting to use Axl Library
202: *
203: * Here is a simple example of a XML 1.0 document:
204: * <div class="xml-doc">
205: * \code
206: * <?xml version="1.0">
207: * <!-- This is a comment -->
208: * <complex>
209: * <data>
210: * <simple>10</simple>
211: * <empty attr1="value1" />
212: * </data>
213: * </complex>
214: * \endcode
215: * </div>
216: *
217: * Previous XML document represents an structure with a top level
218: * node, called <b>complex</b>, that has one single child called
219: * <b>data</b> which in turn have two childs. The first one is the
220: * child called <b>simple</b> that have content and other one, called
221: * <b>empty</b>, which is a node usually called an empty xml node.
222: *
223: * The XML representation for previous document is the following:
224: * \image html image01.png "Document representation"
225: *
226: * Several issues must be considered while interpreting previous
227: * diagram and how Axl library parse and expose those elements through
228: * the API to the client application:
229: * <ul>
230: *
231: * <li>Every XML document have a root node (\ref axl_doc_get_root). Without exception. In this
232: * case, the root node for our example is <b>complex</b>. </li>
233: *
234: * <li>If a node have content, that content is not represented with
235: * another node. That content is associated to the node and could be
236: * retrieved using several function (\ref axl_node_get_content, \ref
237: * axl_node_get_content_copy and \ref
238: * axl_node_get_content_trans).
239: *
240: * Alternatively, while using the MIXED API, you can traverse child
241: * items stored for a particular node, detecting those items that are
242: * \ref ITEM_CONTENT or \ref ITEM_CDATA (using \ref
243: * axl_item_get_type). </li>
244: *
245: * <li>Having a node (\ref axlNode) with content doesn't mean to have a node with
246: * childs. The child notion is only about having more xml nodes (\ref axlNode) as
247: * childs.
248: *
249: * This is particularly important if you take into consideration that a
250: * node could have content (\ref ITEM_CONTENT), comments (\ref
251: * ITEM_COMMENT), application process instructions (\ref ITEM_PI),
252: * CDATA content (uninterpreted content \ref ITEM_CDATA), all of them
253: * mixed with more xml nodes (\ref ITEM_NODE). </li>
254: *
255: * <li>A final node which is empty because it doesn't have content or
256: * childs, is usually referred to as <b>EMPTY</b> type node. A final
257: * node with content but no childs is usually referred to as
258: * <b>PCDATA</b>. A node that have content mixed with references to
259: * more child xml nodes is referred to as <b>MIXED</b>.</li>
260: *
261: * <li>At the <b>empty</b> node, you'll find that it has an attribute
262: * called <b>attr1</b> with a value <b>value1</b>. A node could have
263: * any number of attributes but, it should be named
264: * differently. Again, if a node is empty, it keeps empty even if it
265: * has attributes.
266: * </li>
267: *
268: * So, to summarize, we have a root node, that could contain more
269: * nodes, that could contain PCDATA, or content, and those nodes could
270: * contain named attributes with values.
271: *
272: * \section two_apis MIXED and CHILDREN API: How to use them
273: *
274: * XML 1.0 is used for a variety of purposes, some of them requires
275: * the CHILDREN API and the rest the MIXED API. To <i>require</i>, we
276: * mean that it fits better, so you will get better results, your
277: * application will react in a proper manner and you'll have to do
278: * less work.
279: *
280: * The reason for this API is simple. XML 1.0 definition allows to mix
281: * content with more nodes, comments and many more elements to be
282: * placed as childs for a particular node.
283: *
284: * This definition, found at the standard, have moved many XML
285: * implementations to support only an API that support all these
286: * features, that is, an interface that is complicated and overloaded,
287: * that gives you a power that you don't require, making your
288: * development more inefficient.
289: *
290: * As a result, when a developer only requires a usual form of xml,
291: * called CHILDREN, that means nodes have only another childs
292: * nodes or content but not both at the same time. This kind of xml is
293: * really useful, easy to parse, easy to make a DTD definition, more
294: * compact and extensible.
295: *
296: * Lets see an example for both formats to clarify:
297: *
298: * <div class="xml-doc">
299: * \code
300: * <?xml version='1.0' ?>
301: * <document>
302: * <!-- Children XML format example: as you can see -->
303: * <!-- nodes only contains either nodes or node content -->
304: * <!-- but nothing mixed at the same level -->
305: * <node1>
306: * This is node1 content
307: * </node1>
308: * <node2>
309: * <node3>
310: * This is node3 content
311: * </node3>
312: * <node4 />
313: * </node2>
314: * </document>
315: * \endcode
316: * </div>
317: *
318: * While an MIXED xml document could be:
319: *
320: * <div class="xml-doc">
321: * \code
322: * <?xml version='1.0' ?>
323: * <document>
324: * <!-- Children XML format example: as you can see -->
325: * <!-- nodes only contains either nodes or node content -->
326: * <!-- but nothing mixed at the same level -->
327: * <node1>
328: * This is node1 content
329: * </node1>
330: * Content mixed with xml nodes at the same level.
331: * <node2>
332: * More content....
333: * <node3>
334: * This is node3 content
335: * </node3>
336: * <node4 />
337: * </node2>
338: * </document>
339: * \endcode
340: * </div>
341: *
342: * Both approaches, which are valid using the XML 1.0 standard, are
343: * appropriate for particular situations:
344: *
345: * - CHILDREN API: compact representations, configuration files, rpc
346: * invocation description, graphical user interface definition.
347: * - MIXED API: textual description, for example: XSL-FO.
348: *
349: * Having introduced the context of the problem, Axl Library takes no
350: * position, providing an API that fits while developing xml content
351: * that follows a CHILDREN description and an API for the MIXED
352: * description.
353: *
354: * In this context, which API you use, will only affect to the way you
355: * traverse the document. The CHILDREN API is mainly provided by the
356: * \ref axl_node_module "Axl Node interface" and the MIXED API is
357: * mainly provided by the \ref axl_item_module "Axl Item interface".
358: *
359: * You don't need to do any especial operation to activate both APIs,
360: * both are provided at the same time. Lets see an example:
361: *
362: * Supposing the previous mixed example, the following code will get
363: * access to the <node2> reference:
364: * \code
365: * // supposing "doc" reference contains the document loaded
366: * axlNode * node;
367: *
368: * // get the document root, that is <document>
369: * node = axl_doc_get_root (doc);
370: *
371: * // get the first child for the document root (<node1>)
372: * node = axl_node_get_first_child (node);
373: *
374: * // get the next child (brother of <node1>, that is <node2>)
375: * node = axl_node_get_next (node);
376: * \endcode
377: *
378: * However, with the MIXED API you can get every detail, every item
379: * found for a particular node. This is how:
380: *
381: * \code
382: * // supposing "doc" reference contains the document loaded
383: * axlNode * node;
384: * axlItem * item;
385: *
386: * // get the document root, that is <document>
387: * node = axl_doc_get_root (doc);
388: *
389: * // get the first item child for the document root that is the comment:
390: * // "Children XML format example: as you can see".
391: * item = axl_item_get_first_child (node);
392: *
393: * // now skip the following two comments
394: * item = axl_item_get_next (item);
395: * item = axl_item_get_next (item);
396: *
397: * // now the next item is holding the <node1>
398: * item = axl_item_get_next (item);
399: * node = axl_item_get_data (item);
400: *
401: * // now get the content between the <node1> and <node2>
402: * item = axl_item_get_next (item);
403: *
404: * // and finally, get the next child (brother of <node1>, that is
405: * // <node2>)
406: * item = axl_item_get_next (item);
407: * node = axl_item_get_data (item);
408: * \endcode
409: *
410: * Obviously, the mixed example contains more code and it is more
411: * fragile to xml document changes. The problem is that the MIXED API
412: * is more general than the CHILDREN, making XML libraries to only
413: * provide that API.
414: *
415: * As a consequence:
416: *
417: * - You only need to use the MIXED API (\ref axlItem) if you are
418: * going to do an xml application that allows having content mixed
419: * with nodes, comments, etc, and you want to get access to such
420: * content.
421: *
422: * - If you are planing to develop an XML solution that represents
423: * information (user interfaces), module descriptions, configuration
424: * files, etc, try to use the CHILDREN API: it will save you lot of
425: * work! Remember, CHILDREN xml format: childs are either content or
426: * more xml nodes but not both. Never mixed.
427: *
428: * \section parsing Parsing XML documents
429: *
430: * We have seen how an XML document is. Now we are going to see how to
431: * parse those document into data structures that are usable to
432: * inspect the content. All parsing functions are available at the
433: * \ref axl_doc_module "Axl Doc interface".
434: *
435: * Let's start with a very simple example:
436: *
437: * \code
438: * #include <axl.h>
439: * #include <stdio.h>
440: *
441: * int main (int argc, char ** argv)
442: * {
443: * axlError ** error;
444: *
445: * // top level definitions
446: * axlDoc * doc = NULL;
447: *
448: * // initialize axl library
449: * if (! axl_init ()) {
450: * printf ("Unable to initialize Axl library\n");
451: * return -1;
452: * }
453: *
454: * // get current doc reference
455: * doc = axl_doc_parse_from_file ("large.xml", error);
456: * if (doc == NULL) {
457: * axl_error_free (error);
458: * return axl_false;
459: * }
460: *
461: * // DO SOME WORK WITH THE DOCUMENT HERE
462: *
463: * // release the document
464: * axl_doc_free (doc);
465: *
466: * // cleanup axl library
467: * axl_end ();
468: *
469: * return axl_true;
470: * }
471: * \endcode
472: *
473: * \section iterating Traveling an XML document
474: *
475: * Once the document is loaded you can use several function to
476: * traverse the document.
477: *
478: * First you must use \ref axl_doc_get_root to get the document root
479: * (\ref axlNode) which contains all the information. Then, according
480: * to the interface you are using, you must call to either \ref
481: * axl_node_get_first_child or \ref axl_item_get_first_child.
482: *
483: * Once you have access to the first element, you can use the
484: * following set of function to get more references to other nodes or
485: * items:
486: *
487: * <ul>
488: * <li><b>MIXED API</b>:
489: *
490: * - \ref axl_item_get_first_child
491: * - \ref axl_item_get_last_child
492: * - \ref axl_item_get_next
493: * - \ref axl_item_get_previous
494: *
495: * </li>
496: * <li><b>CHILDREN API</b>:
497: *
498: * - \ref axl_node_get_first_child
499: * - \ref axl_node_get_last_child
500: * - \ref axl_node_get_next
501: * - \ref axl_node_get_previous
502: *
503: * </li>
504: * </ul>
505: *
506: * There are alternative APIs that will allow you to iterate the
507: * document, providing a callback: \ref axl_doc_iterate.
508: *
509: * Another approach is to use \ref axl_doc_get and \ref
510: * axl_doc_get_content_at to get fast access to a particular node
511: * using a really limited XPath syntax.
512: *
513: * \section modifying Modifying a loaded XML document
514: *
515: * One feature that comes with Axl Library is ability to modify the
516: * content, replacing it with other content and transferring node node
517: * to another place.
518: *
519: * Check the following function while operating with \ref axlNode elements:
520: *
521: * - \ref axl_node_replace
522: * - \ref axl_node_remove
523: * - \ref axl_node_transfer_childs
524: *
525: * Check the following functions while operating with \ref axlItem elements:
526: *
527: * - \ref axl_item_replace
528: * - \ref axl_item_remove
529: * - \ref axl_item_transfer_childs_after
530: *
531: * \section dumping_functions Producing xml documents from memory
532: *
533: * Axl Library comes with several functions to perform xml memory dump
534: * operations, allowing to translate a xml representation (\ref axlDoc
535: * or \ref axlNode) into a string:
536: *
537: * - \ref axl_doc_dump
538: * - \ref axl_doc_dump_pretty
539: * - \ref axl_doc_dump_to_file
540: * - \ref axl_doc_dump_pretty_to_file
541: *
542: * In the case you want to produce xml content taking as reference a
543: * particular node use:
544: *
545: * - \ref axl_node_dump
546: * - \ref axl_node_dump_pretty
547: * - \ref axl_node_dump_to_file
548: * - \ref axl_node_dump_pretty_to_file
549: *
550: * \section validation Validating XML documents
551: *
552: * Once you are familiar with the Axl API, or any other XML toolkit,
553: * it turns that it is not a good practice to write lot of source code
554: * to check node names expected or how they are nested. This makes
555: * your program really weak to changes and makes your to write more
556: * code that is not actual work but a simple environment check.
557: *
558: * You may also need to check that some XML document received follows
559: * a defined XML structure, but it is too complex to be done.
560: *
561: * For this purpose, XML 1.0 defines DTD or (Document Type Definition)
562: * which allows to specify the document grammar, how are nested
563: * nodes, which attributes could contain, or if the are allocated to
564: * be empty nodes or nodes that must have another child nodes.
565: *
566: * Let start with the DTD syntax used to configure restrictions about
567: * node structure:
568: *
569: * <div class="xml-doc">
570: * \code
571: * <!-- sequence specification -->
572: * <!ELEMENT testA (test1, test2, test3)>
573: *
574: * <!-- choice specification -->
575: * <!ELEMENT testB (test1 | test2 | test3)>
576: * \endcode
577: * </div>
578: *
579: *
580: * DTD <b><!ELEMENT</b> is modeled on top of two concepts which are
581: * later expanded with repetition patterns. We will explain then
582: * later. For now, this two top level concepts are: sequence and choice.
583: *
584: * Sequence specification (elements separated by <b>, (comma)</b>, the
585: * one used to apply restriction to the node <b>testA</b>, are used to
586: * denote that <b>testA</b> have as childs test1, followed by test2
587: * and ended by test3. The order specified must be followed and all
588: * instances must appear. This could be tweaked using repetition
589: * pattern.
590: *
591: * In the other hand, choice specification (elements separated by
592: * <b>| (pipe)</b>, are used to specify that the content of a node is
593: * built using nodes of the choice list. So, in this case,
594: * <b>testB</b> node could have either one instance of test1 or test2
595: * or test3.
596: *
597: * Now you know these to basic elements to model how childs are
598: * organized for a node, what it is need is to keep on adding more
599: * <!ELEMENT directives until all nodes are specified. You will end
600: * your DTD document with final nodes that are either empty ones or
601: * have PCDATA. At this moment MIXED nodes are not supported.
602: *
603: * Suppose that all nodes that are inside testA and testB are final
604: * ones. Then this could be its DTD specification:
605: *
606: * <div class="xml-doc">
607: * \code
608: * <!-- test1 is a node that only have content -->
609: * <!ELEMENT test1 (#PCDATA)>
610: *
611: * <!-- test2 is a node that is always empty -->
612: * <!ELEMENT test1 EMPTY>
613: *
614: * <!-- test3 is a node that could have either test1 or test2 -->
615: * <!ELEMENT test3 (test1 | test2)>
616: * \endcode
617: * </div>
618: *
619: * Sequences and choices could be composed to create richer DTD
620: * expressions that combines sequences of choices and so on.
621: *
622: * At this point all required elements to model choices, sequences and
623: * final nodes are explained, but, we have to talk about repetition
624: * pattern. They are symbols that are appended to elements inside
625: * choices (or sequences) including those list specifications.
626: *
627: * Patterns available are: <b>+</b>, <b>?</b> and <b>*</b>. By
628: * default, if no pattern is applied to the element, it means that the
629: * match should be produced one and only one time.
630: *
631: * The <b>+</b> pattern is used to model that element should be
632: * matched one, and at least one, or more.
633: *
634: * The <b>*</b> pattern is used to model elements that should be
635: * matched zero or any times.
636: *
637: * The <b>?</b> pattern is used to model elements that should be
638: * matched zero or one times.
639: *
640: * For the exampled initially explained, let's suppose we want that
641: * the content inside <b>testA</b> have sequences repeated at leat one
642: * time, being that sequence: test1, test2 and test3. We only need to
643: * add a <b>+</b> repetition pattern as follows:
644: *
645: * <div class="xml-doc">
646: * \code
647: * <!-- sequence specification -->
648: * <!ELEMENT testA (test1, test2, test3)+>
649: * \endcode
650: * </div>
651: *
652: * So, we are saying to our validation engine that the sequence inside
653: * testA could be found one or many times, but the entire sequence
654: * match be found every time.
655: *
656: * Here is an simple example that loads an XML document, then loads an
657: * DTD file, and then validates the XML document:
658: * \code
659: * bool test_12 (axlError ** error)
660: * {
661: * axlDoc * doc = NULL;
662: * axlDtd * dtd = NULL;
663: *
664: * // parse gmovil file (an af-arch xml chunk)
665: * doc = axl_doc_parse_from_file ("channel.xml", error);
666: * if (doc == NULL)
667: * return axl_false;
668: *
669: * // parse af-arch DTD
670: * dtd = axl_dtd_parse_from_file ("channel.dtd", error);
671: * if (dtd == NULL)
672: * return axl_false;
673: *
674: * // perform DTD validation
675: * if (! axl_dtd_validate (doc, dtd, error)) {
676: * return axl_false;
677: * }
678: *
679: * // free doc reference
680: * axl_doc_free (doc);
681: *
682: * // free dtd reference
683: * axl_doc_free (dtd);
684: *
685: * return axl_true;
686: * }
687: * \endcode
688: *
689: * Until now, we have seen how to check xml structure. But this do not
690: * cover xml node attributes. This is checked by using
691: * <b><!ATTLIST></b> declaration.
692: *
693: * In the case we have a node <b>testA</b> with two attribuets
694: * <b>attr1</b> and <b>attr2</b> the first one optional and the second
695: * one mandatory, we can declare something like:
696: *
697: * <div class="xml-doc">
698: * \code
699: * <!-- attribute validation for node testA -->
700: * <!ATTLIST testA
701: * attr1 CDATA #IMPLIED
702: * attr2 CDATA #REQUIRED>
703: * \endcode
704: * </div>
705: *
706: *
707: * \section xml_namespace Enabling your software with XML Namespaces
708: *
709: * XML 1.0 initial design didn't take care about situations where
710: * several software vendors could introduce content inside the same
711: * XML documents. This has several benefits, but one problem to solve:
712: * <i>how to avoid xml node names (tags) to clash from each other.</i>
713: *
714: * Think about using <table> as a tag for your document. Many XML
715: * applications uses <table> as a valid tag for its XML language
716: * set. However, each of them has a different meaning and must be
717: * handled by the proper XML software.
718: *
719: * While developing applications with XML, and supposing such XML
720: * documents will be used by more applications than yours, you are
721: * likely to be interested in use XML Namespaces. In other words, many
722: * of the new XML standards that are appearing uses XML Namespaces to
723: * allow defining its xml node names, while allowing users/developers
724: * to use their own set of xml tags, under their own XML Namespaces,
725: * in order they can use them in the same document.
726: *
727: * XML Namespaces support inside Axl Library is handled through a
728: * separated library, which requires the base library to function. \ref axl_install "Here are some instructions to get Axl Library Namespace installed."
729: *
730: * This library provides functions that replaces some of the functions
731: * used by XML applications that don't require XML Namespaces. In
732: * particular, some of them are:
733: *
734: * - \ref axl_ns_doc_validate (see this for an example)
735: * - \ref axl_ns_node_cmp
736: *
737: * See also API documentation for all functions that are provided to
738: * enable your application with XML Namespaces:
739: *
740: * - \ref axl_ns_doc_module
741: * - \ref axl_ns_node_module
742: *
743: * \section using_axl_babel Making your software to support other encodings than UTF-8
744: *
745: * Default axl library implementation (libaxl) assumes it will receive
746: * and produce UTF-8 content.
747: *
748: * Because the subset of characters that are used to properly parse
749: * XML content are located in the ASCII range, still valid UTF-8, but
750: * at same time valid in other encodings such ISO 646, some part of
751: * ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width
752: * encoding which ensures that the characters of ASCII have their
753: * normal positions, width, and values (See section F. Autodetecting
754: * of Character Encodings at http://www.w3.org/TR/REC-xml/), causes
755: * the library to properly parse the content, even if it is not UTF-8.
756: *
757: * In many cases this is not important for you since your application
758: * do not care about content codification (such configuration files)
759: * or they are in UTF-8.
760: *
761: * However, this could present problems if you are handling different
762: * documents with several encoding types. The idea is to have an
763: * unified way to handle such different encoded documents, with a
764: * single, run-time encoding: UTF-8.
765: *
766: * <b>libaxl-babel</b> provides support to read content in supported
767: * codifications and translate it into UTF-8 at run-time (checking
768: * result to be valid UTF-8):
769: *
770: * \image html axl_babel_reading.png "Reading documents and handle them as they were in UTF-8"
771: *
772: * The library works as an extension that configures a set of handlers
773: * making the library to open XML documents and translating them into
774: * UTF-8 if required.
775: *
776: * To activate the library, you must use \ref axl_babel_init at the
777: * begining of your application or library. Here is an example:
778: *
779: * \code
780: * // optional axlError declaration
781: * axlError * error;
782: *
783: * // init axl babel
784: * if (! axl_babel_init (&error)) {
785: * printf ("Failed to start axl babel: %s...\n",
786: * axl_error_get (error));
787: * axl_error_free (error);
788: * return axl_false;
789: * }
790: * \endcode
791: *
792: * Once done, every call to the base API (such \ref axl_doc_parse,
793: * \ref axl_doc_parse_from_file) will open the document as usual. It
794: * is not required to perform any additional special operation.
795: *
796: * It is not required to call to \ref axl_babel_finish on application
797: * exit. However, in the case you want to deactivate
798: * <b>libaxl-babel</b> but still keep on using axl base library, you
799: * can use \ref axl_babel_finish.
800: *
801: * See \ref axl_babel_init for currently supported formats.
802: *
803: * \section reducing_foot_print How to reduce the library footprint
804: *
805: * Axl Library is implemented in a modular way to ensure you are only
806: * linked against those software elements that you really
807: * require. Additionally, the library allows the following to reduce
808: * the library footprint to the minimum:
809: *
810: * <ul>
811: * <li><b>Remove log information:</b> <br> Axl library uses a console log
812: * mechanism to report what's happening during processing. See \ref
813: * axl_log_module "Axl Log reporting" module for more
814: * information. However, under production environments this console
815: * log isn't necessary, so you can safely remove it, at compile time,
816: * using <b>--axl-log-disable</b> as follow:
817: *
818: * \code
819: * >> ./configure --axl-log-disable
820: * \endcode
821: *
822: * According to our results, the library including the log to console
823: * information is about <b>366K</b>. Without log to console information the
824: * library takes about <b>288K</b>.
825: *
826: * </li>
827: *
828: * <li><b>Remove debugging information from the library: </b> <br>You
829: * can also remove debugging information from your library on
830: * production environments doing the following once finished compilation process:
831: *
832: * \code
833: * >> make install-strip
834: * \endcode
835: *
836: * According to our results, the library without log to console and
837: * debugging information takes about <b>100K</b>.
838: *
839: * </li>
840: * </ul>
841: *
842: * Previous information applies to the Axl base Library
843: * (libaxl.so/.dll), however the same happens for the rest of software
844: * components bundle with Axl.
845: *
846: *
847: * \section futher Futher reading where to go for more information
848: *
849: * You can also check \ref axl_api "API documentation" for a complete
850: * detailed explanation about the library.
851: *
852: * Please, if you find that something isn't properly documented or you
853: * think that something could be improved, contact us in the mailing
854: * list. We are building Axl Library with the aim to produce a high
855: * quality, commercial grade, open source XML development kit, so, any
856: * help received will be welcome.
857: *
858: * Remember you can always contact us at the mailing list for any
859: * question not properly answered by this documentation. See <a
860: * href="http://www.aspl.es/axl/doc.html">Axl Library website
861: * documentation</a> to get more
862: * information about mailing list.
863: *
864: */
865:
866: /**
867: * \page axl_api LibAxl API documentation
868: *
869: * Here is the API for the modules defined inside the library:
870: *
871: * <ul>
872: * <li><b>Basic API to interact with XML documents (base library libaxl): </b></li>
873: *
874: * - \ref axl_module
875: * - \ref axl_doc_module
876: * - \ref axl_node_module
877: * - \ref axl_node_attribute_cursor
878: * - \ref axl_node_annotate
879: * - \ref axl_item_module
880: * - \ref axl_dtd_module
881: * - \ref axl_decl_module
882: * - \ref axl_handlers
883: *
884: * </li>
885: * <li><b>XML Namespaces API (required additional library libaxl-ns): </b></li>
886: *
887: * - \ref axl_ns_doc_module
888: * - \ref axl_ns_node_module
889: *
890: * </li>
891: * <li><b>Axl BABEL API (required to support additional encoding formats): </b></li>
892: *
893: * - \ref axl_babel
894: *
895: * </li>
896: * <li><b>Error reporting and debugging functions (base library libaxl): </b></li>
897: *
898: * - \ref axl_error_module
899: * - \ref axl_log_module
900: *
901: * <li><b>Auxiliary modules, supporting data types, string handling, etc (base library libaxl):</b></li>
902: *
903: * - \ref axl_stream_module
904: * - \ref axl_list_module
905: * - \ref axl_list_cursor_module
906: * - \ref axl_stack_module
907: * - \ref axl_binary_stack_module
908: * - \ref axl_hash_module
909: * - \ref axl_hash_cursor_module
910: * - \ref axl_string_module
911: *
912: * </li>
913: * </ul>
914: *
915: */
916:
917: /**
918: * @page axl_install Installing Axl library and using it
919: *
920: * \section intro Introduction
921: *
922: * Axl library is an XML library written in ANSI C, which is known to
923: * work on Microsoft Windows, GNU/Linux, *-BSD, GNU/Linux under
924: * Amd64/Itanium and MacOS/X platforms.
925: *
926: * The library package is composed by the following items:
927: *
928: * \image html axl-components.png "Axl library components"
929: *
930: * - <b>libaxl</b>: base library implementing all XML functions, and
931: * common API used by extension libraries. It has no external
932: * dependencies.
933: *
934: * - <b>libaxl-ns</b>: optional library, built on top of libaxl, which
935: * provides Namespaces support. It depends on libaxl.
936: *
937: * - <b>libaxl-babel</b>: optional library, built on top of libaxl,
938: * which provides extended encoding support to defult utf-8.
939: *
940: * - <b>axl-knife</b>: command line tool, built on top of libaxl and
941: * libaxl-ns. It depends on the base library and the namespace
942: * library.
943: *
944: * Here are a set of instructions to get the library compiled for your
945: * platform:
946: *
947: * \section axl_on_unix GNU/Linux (or any posix OS) installation instructions
948: *
949: * First, download the package from the download section. Check <a
950: * href="http://www.aspl.es/axl/doc.html">this section to know more
951: * about this.</a>
952: *
953: * Then use the standard procedure to get compiled an autotool ready
954: * package. Here are the steps:
955: *
956: * \code
957: * >> tar xzvf axl-0.2.1.b1984.g1985.tar.gz
958: * >> cd axl-0.2.1.b1984.g1985/
959: * >> ./configure
960: * >> make
961: * >> make install
962: * \endcode
963: *
964: * \section checking_axl Checking Axl Library compilation (regression test)
965: *
966: * Once finished, to check your axl build to properly work, get inside
967: * the test/ directory and run axl regression test (test_01):
968: * \code
969: * >> cd test/
970: * >> ./test_01
971: * \endcode
972: *
973: * All test must execute properly to ensure the library is fully functional.
974: *
975: * \section windows Windows installation instructions
976: *
977: * Axl library comes with packages already built for Microsoft Windows platforms.
978: * Check <a href="http://www.aspl.es/axl/doc.html">the download section to get them</a>.
979: *
980: * \section using Using LibAxl library (installations with pkg-config infrastructure)
981: *
982: * Axl Library is composed by a base library, which provides all XML
983: * 1.0 function. Additionally, a separated library is provided for
984: * Namespace functions.
985: *
986: * Axl library comes with pkg-config support, which makes easy to use
987: * it inside your pkg-config enable projects.
988: *
989: * To check current support for your Axl Library you can execute:
990: *
991: * \code
992: * >> pkg-config --libs --cflags axl
993: * \endcode
994: *
995: * To give support for XML Namespaces (libaxl-ns), you must use:
996: * \code
997: * >> pkg-config --libs --cflags axl-ns
998: * \endcode
999: *
1000: * To give support for extended encoding support (libaxl-babel), you must use:
1001: * \code
1002: * >> pkg-config --libs --cflags axl-babel
1003: * \endcode
1004: *
1005: * To include support into your autotool checks (configure.ac/in
1006: * files) you can place the following piece:
1007: * \code
1008: * PKG_CHECK_MODULES(LIBRARIES, axl)
1009: * \endcode
1010: *
1011: * Again, add <b>axl-ns</b> or <b>axl-babel</b> to the previous
1012: * instruction if you want your package to also check for Axl Library
1013: * Namespace support.
1014: *
1015: * \section Including Axl Library headers
1016: *
1017: * For all platforms, Axl Library base headers has to be included as follows:
1018: * \code
1019: * #include <axl.h>
1020: * \endcode
1021: *
1022: * In the case Namespace is required, you must use:
1023: * \code
1024: * #include <axl_ns.h>
1025: * \endcode
1026: *
1027: * To include babel support, you must use:
1028: * \code
1029: * #include <axl_babel.h>
1030: * \endcode
1031: */
1032:
1033:
1034: /**
1035: * \defgroup axl_module Axl main: Init functions for the library
1036: */
1037:
1038: /**
1039: * \addtogroup axl_module
1040: * @{
1041: */
1042:
1043: /**
1044: * @brief Initializes Axl library.
1045: *
1046: * Currently this function is not required to be executed because
1047: * libaxl implementation don't use it. The implementation will try to
1048: * make no used for internal variable initialization.
1049: *
1050: * However the API is provided for future usage.
1051: *
1052: * @return The function returns \ref axl_true if it was properly
1053: * initialized or \ref axl_false if something fails. if fails.
1054: */
1055: axl_bool axl_init (void)
1056: {
1057: /* nothing to initialize dude */
1058: return axl_true;
1059: }
1060:
1061: /**
1062: * @brief Allows to terminate libaxl library function.
1063: *
1064: * Currently this function is not required becasue nothing is done to
1065: * stop libaxl function. The implementation will try to keep this,
1066: * however, if the future this could be required.
1067: */
1068: void axl_end (void)
1069: {
1070: /* nothing to do jack */
1071: return;
1072: }
1073:
1074: /* @} */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>