File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / test / intsubset2.xml
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:37:59 2012 UTC (12 years, 4 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, v2_8_0p0, v2_8_0, v2_7_8, HEAD
libxml2

    1: <?xml version="1.0"?>
    2: <!DOCTYPE kanjidic2 [
    3: 	<!-- Version 1.3
    4: 	This is the DTD of the XML-format kanji file combining information from
    5: 	the KANJIDIC and KANJD212 files. It is intended to be largely self-
    6: 	documenting, with each field being accompanied by an explanatory
    7: 	comment.
    8: 
    9: 	The file covers the following kanji:
   10: 	(a) the 6,355 kanji from JIS X 0208;
   11: 	(b) the 5,801 kanji from JIS X 0212;
   12: 	(c) the 3,625 kanji from JIS X 0213 as follows:
   13: 		(i) the 2,741 kanji which are also in JIS X 0212 have
   14: 		JIS X 0213 code-points (kuten) added to the existing entry;
   15: 		(ii) the 884 "new" kanji have new entries.
   16: 
   17: 	At the end of the explanation for a number of fields there is a tag
   18: 	with the format [N]. This indicates the leading letter(s) of the
   19: 	equivalent field in the KANJIDIC and KANJD212 files.
   20: 
   21: 	The KANJIDIC documentation should also be read for additional 
   22: 	information about the information in the file.
   23: 	-->
   24: <!ELEMENT kanjidic2 (header,character*)>
   25: <!ELEMENT header (file_version,database_version,date_of_creation)>
   26: <!--
   27: 	The single header element will contain identification information
   28: 	about the version of the file 
   29: 	-->
   30: <!ELEMENT file_version (#PCDATA)>
   31: <!--
   32: 	This field denotes the version of kanjidic2 structure, as more
   33: 	than one version may exist.
   34: 	-->
   35: <!ELEMENT database_version (#PCDATA)>
   36: <!--
   37: 	The version of the file, in the format YYYY-NN, where NN will be
   38: 	a number starting with 01 for the first version released in a
   39: 	calendar year, then increasing for each version in that year.
   40: 	-->
   41: <!ELEMENT date_of_creation (#PCDATA)>
   42: <!--
   43: 	The date the file was created in international format (YYYY-MM-DD).
   44: 	-->
   45: <!ELEMENT character (literal,codepoint, radical, misc, dic_number?, query_code?, reading_meaning?,nanori?)*>
   46: <!ELEMENT literal (#PCDATA)>
   47: <!--
   48: 	The character itself in UTF8 coding.
   49: 	-->
   50: <!ELEMENT codepoint (cp_value+)>
   51: 	<!-- 
   52: 	The codepoint element states the code of the character in the various
   53: 	character set standards.
   54: 	-->
   55: <!ELEMENT cp_value (#PCDATA)>
   56: 	<!-- 
   57: 	The cp_value contains the codepoint of the character in a particular
   58: 	standard. The standard will be identified in the cp_type attribute.
   59: 	-->
   60: <!ATTLIST cp_value cp_type CDATA #REQUIRED>
   61: 	<!-- 
   62: 	The cp_type attribute states the coding standard applying to the
   63: 	element. The values assigned so far are:
   64: 		jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
   65: 		jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
   66: 		jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
   67: 		ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
   68: 	-->
   69: <!ELEMENT radical (rad_value+)>
   70: <!ELEMENT rad_value (#PCDATA)>
   71: 	<!-- 
   72: 	The radical number, in the range 1 to 214. The particular
   73: 	classification type is stated in the rad_type attribute.
   74: 	-->
   75: <!ATTLIST rad_value rad_type CDATA #REQUIRED>
   76: 	<!-- 
   77: 	The rad_type attribute states the type of radical classification.
   78: 		classical - as recorded in the KangXi Zidian.
   79: 		nelson - as used in the Nelson "Modern Japanese-English 
   80: 		Character Dictionary" (i.e. the Classic, not the New Nelson).
   81: 		This will only be used where Nelson reclassified the kanji.
   82: 	-->
   83: <!ELEMENT misc (grade?, stroke_count+, variant*, freq*, rad_name*)>
   84: <!ELEMENT grade (#PCDATA)>
   85: 	<!-- 
   86: 	The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
   87: 	the kanji is taught in Japanese schools. 8 indicates it is one of the
   88: 	remaining Jouyou Kanji to be learned in junior high school, and 9 
   89: 	indicates it is a Jinmeiyou (for use in names) kanji. [G]
   90: 	-->
   91: <!ELEMENT stroke_count (#PCDATA)>
   92: 	<!-- 
   93: 	The stroke count of the kanji, including the radical. If more than 
   94: 	one, the first is considered the accepted count, while subsequent ones 
   95: 	are common miscounts. (See Appendix E. of the KANJIDIC documentation
   96: 	for some of the rules applied when counting strokes in some of the 
   97: 	radicals.) [S]
   98: 	-->
   99: <!ELEMENT variant (#PCDATA)>
  100: 	<!-- 
  101: 	A cross-reference code to another kanji, usually regarded as a variant.
  102: 	The type of cross-reference is given in the var_type attribute.
  103: 	-->
  104: <!ATTLIST variant var_type CDATA #REQUIRED>
  105: 	<!-- 
  106: 	The var_type attribute indicates the type of variant code. The current
  107: 	values are: 
  108: 		jis208 - in JIS X 0208 - kuten coding
  109: 		jis212 - in JIS X 0212 - kuten coding
  110: 		jis213 - in JIS X 0213 - kuten coding
  111: 		deroo - De Roo number - numeric
  112: 		njecd - Halpern NJECD index number - numeric
  113: 		s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
  114: 		nelson - "Classic" Nelson - numeric
  115: 		oneill - Japanese Names (O'Neill) - numeric
  116: 	-->
  117: <!ELEMENT freq (#PCDATA)>
  118: 	<!-- 
  119: 	A frequency-of-use ranking. The 2,500 most-used characters have a 
  120: 	ranking; those characters that lack this field are not ranked. The 
  121: 	frequency is a number from 1 to 2,500 that expresses the relative 
  122: 	frequency of occurrence of a character in modern Japanese. This is
  123: 	based on a survey in newspapers, so it is biassed towards kanji
  124: 	used in newspaper articles. The discrimination between the less
  125: 	frequently used kanji is not strong.
  126: 	-->
  127: <!ELEMENT rad_name (#PCDATA)>
  128: 	<!-- 
  129: 	When the kanji is itself a radical and has a name, this element
  130: 	contains the name (in hiragana.) [T2]
  131: 	-->
  132: <!ELEMENT dic_number (dic_ref+)>
  133: 	<!-- 
  134: 	This element contains the index numbers and similar unstructured
  135: 	information such as page numbers in a number of published dictionaries,
  136: 	and instructional books on kanji.
  137: 	-->
  138: <!ELEMENT dic_ref (#PCDATA)>
  139: 	<!-- 
  140: 	Each dic_ref contains an index number. The particular dictionary,
  141: 	etc. is defined by the dr_type attribute.
  142: 	-->
  143: <!ATTLIST dic_ref dr_type CDATA #REQUIRED>
  144: 	<!-- 
  145: 	The dr_type defines the dictionary or reference book, etc. to which
  146: 	dic_ref element applies. The initial allocation is:
  147: 	  nelson_c - "Modern Reader's Japanese-English Character Dictionary",  
  148: 	  	edited by Andrew Nelson (now published as the "Classic" 
  149: 	  	Nelson).
  150: 	  nelson_n - "The New Nelson Japanese-English Character Dictionary", 
  151: 	  	edited by John Haig.
  152: 	  halpern_njecd - "New Japanese-English Character Dictionary", 
  153: 	  	edited by Jack Halpern.
  154: 	  halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by 
  155: 	  	Jack Halpern.
  156: 	  heisig - "Remembering The  Kanji"  by  James Heisig.
  157: 	  gakken - "A  New Dictionary of Kanji Usage" (Gakken)
  158: 	  oneill_names - "Japanese Names", by P.G. O'Neill. 
  159: 	  oneill_kk - "Essential Kanji" by P.G. O'Neill.
  160: 	  moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
  161: 	  	additional attributes are used: m_vol:  the volume of the
  162: 	  	dictionary in which the kanji is found, and m_page: the page
  163: 	  	number in the volume.
  164: 	  henshall - "A Guide To Remembering Japanese Characters" by
  165: 	  	Kenneth G.  Henshall.
  166: 	  sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
  167: 	  sakade - "A Guide To Reading and Writing Japanese" edited by
  168: 	  	Florence Sakade.
  169: 	  henshall3 - "A Guide To Reading and Writing Japanese" 3rd
  170: 		edition, edited by Henshall, Seeley and De Groot.
  171: 	  tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
  172: 	  crowley - "The Kanji Way to Japanese Language Power" by
  173: 	  	Dale Crowley.
  174: 	  kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
  175: 	  busy_people - "Japanese For Busy People" vols I-III, published
  176: 		by the AJLT. The codes are the volume.chapter.
  177: 	  kodansha_compact - the "Kodansha Compact Kanji Guide".
  178: 	-->
  179: <!ATTLIST dic_ref m_vol CDATA #IMPLIED>
  180: 	<!-- 
  181: 	See above under "moro".
  182: 	-->
  183: <!ATTLIST dic_ref m_page CDATA #IMPLIED>
  184: 	<!-- 
  185: 	See above under "moro".
  186: 	-->
  187: <!ELEMENT query_code (q_code+)>
  188: 	<!-- 
  189: 	These codes contain information relating to the glyph, and can be used
  190: 	for finding a required kanji. The type of code is defined by the
  191: 	qc_type attribute.
  192: 	-->
  193: <!ELEMENT q_code (#PCDATA)>
  194: 	<!--
  195: 	The q_code contains the actual query-code value, according to the
  196: 	qc_type attribute.
  197: 	-->
  198: <!ATTLIST q_code qc_type CDATA #REQUIRED>
  199: 	<!-- 
  200: 	The q_code attribute defines the type of query code. The current values
  201: 	are:
  202: 	  skip -  Halpern's SKIP (System  of  Kanji  Indexing  by  Patterns) 
  203: 	  	code. The  format is n-nn-nn.  See the KANJIDIC  documentation 
  204: 	  	for  a description of the code and restrictions on  the 
  205: 	  	commercial  use  of this data. [P]
  206: 
  207: 	  sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle 
  208: 	  	1996) by Spahn and Hadamitzky. They are in the form nxnn.n,  
  209: 	  	e.g.  3k11.2, where the  kanji has 3 strokes in the 
  210: 	  	identifying radical, it is radical "k" in the SH 
  211: 	  	classification system, there are 11 other strokes, and it is 
  212: 	  	the 2nd kanji in the 3k11 sequence. (I am very grateful to 
  213: 	  	Mark Spahn for providing the list of these descriptor codes 
  214: 	  	for the kanji in this file.) [I]
  215: 	  four_corner - the "Four Corner" code for the kanji. This is a code 
  216: 	  	invented by Wang Chen in 1928. See the KANJIDIC documentation 
  217: 	  	for  an overview of  the Four Corner System. [Q]
  218: 
  219: 	  deroo - the codes developed by the late Father Joseph De Roo, and 
  220: 	  	published in  his book "2001 Kanji" (Bojinsha). Fr De Roo 
  221: 	  	gave his permission for these codes to be included. [DR]
  222: 	  misclass - a possible misclassification of the kanji according
  223: 		to one of the code types. (See the "Z" codes in the KANJIDIC
  224: 		documentation for more details.)
  225: 	  
  226: 	-->
  227: <!ELEMENT reading_meaning (rmgroup*, nanori*)>
  228: 	<!-- 
  229: 	The readings for the kanji in several languages, and the meanings, also
  230: 	in several languages. The readings and meanings are grouped to enable
  231: 	the handling of the situation where the meaning is differentiated by 
  232: 	reading. [T1]
  233: 	-->
  234: <!ELEMENT nanori (#PCDATA)>
  235: 	<!-- 
  236: 	Japanese readings that are now only associated with names.
  237: 	-->
  238: <!ELEMENT rmgroup (reading*, meaning*)>
  239: <!ELEMENT reading (#PCDATA)>
  240: 	<!-- 
  241: 	The reading element contains the reading or pronunciation
  242: 	of the kanji.
  243: 	-->
  244: <!ATTLIST reading r_type CDATA #REQUIRED>
  245: 	<!-- 
  246: 	The r_type attribute defines the type of reading in the reading
  247: 	element. The current values are:
  248: 	  pinyin - the modern PinYin romanization of the Chinese reading 
  249: 	  	of the kanji. The tones are represented by a concluding 
  250: 	  	digit. [Y]
  251: 	  korean_r - the romanized form of the Korean reading(s) of the 
  252: 	  	kanji.  The readings are in the (Republic of Korea) Ministry 
  253: 	  	of Education style of romanization. [W]
  254: 	  korean_h - the Korean reading(s) of the kanji in hangul.
  255: 	  ja_on - the "on" Japanese reading of the kanji, in katakana. A
  256: 	  	second attribute r_status, if present, will indicate with
  257: 	  	a value of "jy" whether the reading is approved for a
  258: 	  	"Jouyou kanji".
  259: 	  ja_kun - the "kun" Japanese reading of the kanji, in hiragana. 
  260: 	  	Where relevant the okurigana is also included separated by a 
  261: 	  	".". Readings associated with prefixes and suffixes are 
  262: 	  	marked with a "-". A second attribute r_status, if present, 
  263: 	  	will indicate with a value of "jy" whether the reading is 
  264: 	  	approved for a "Jouyou kanji".
  265: 	-->
  266: <!ATTLIST reading r_status CDATA #IMPLIED>
  267: 	<!-- 
  268: 	See under ja_on and ja_kun above.
  269: 	-->
  270: <!ELEMENT meaning (#PCDATA)>
  271: 	<!-- 
  272: 	The meaning associated with the kanji.
  273: 	-->
  274: <!ATTLIST meaning m_lang CDATA #IMPLIED>
  275: 	<!-- 
  276: 	The m_lang attribute defines the target language of the meaning. It 
  277: 	will be coded using the two-letter language code from the ISO 639 
  278: 	standard. When absent, the value "en" (i.e. English) is implied. [{}]
  279: 	-->
  280: ] >
  281: <kanjidic2>
  282: </kanjidic2>

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>