embedaddon/libxml2/test/intsubset2.xml - annotate

Return to intsubset2.xml CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / test
Annotation of embedaddon/libxml2/test/intsubset2.xml, revision 1.1.1.1

1.1       misho       1: <?xml version="1.0"?>
                      2: <!DOCTYPE kanjidic2 [
                      3:        <!-- Version 1.3
                      4:        This is the DTD of the XML-format kanji file combining information from
                      5:        the KANJIDIC and KANJD212 files. It is intended to be largely self-
                      6:        documenting, with each field being accompanied by an explanatory
                      7:        comment.
                      8: 
                      9:        The file covers the following kanji:
                     10:        (a) the 6,355 kanji from JIS X 0208;
                     11:        (b) the 5,801 kanji from JIS X 0212;
                     12:        (c) the 3,625 kanji from JIS X 0213 as follows:
                     13:                (i) the 2,741 kanji which are also in JIS X 0212 have
                     14:                JIS X 0213 code-points (kuten) added to the existing entry;
                     15:                (ii) the 884 "new" kanji have new entries.
                     16: 
                     17:        At the end of the explanation for a number of fields there is a tag
                     18:        with the format [N]. This indicates the leading letter(s) of the
                     19:        equivalent field in the KANJIDIC and KANJD212 files.
                     20: 
                     21:        The KANJIDIC documentation should also be read for additional 
                     22:        information about the information in the file.
                     23:        -->
                     24: <!ELEMENT kanjidic2 (header,character*)>
                     25: <!ELEMENT header (file_version,database_version,date_of_creation)>
                     26: <!--
                     27:        The single header element will contain identification information
                     28:        about the version of the file 
                     29:        -->
                     30: <!ELEMENT file_version (#PCDATA)>
                     31: <!--
                     32:        This field denotes the version of kanjidic2 structure, as more
                     33:        than one version may exist.
                     34:        -->
                     35: <!ELEMENT database_version (#PCDATA)>
                     36: <!--
                     37:        The version of the file, in the format YYYY-NN, where NN will be
                     38:        a number starting with 01 for the first version released in a
                     39:        calendar year, then increasing for each version in that year.
                     40:        -->
                     41: <!ELEMENT date_of_creation (#PCDATA)>
                     42: <!--
                     43:        The date the file was created in international format (YYYY-MM-DD).
                     44:        -->
                     45: <!ELEMENT character (literal,codepoint, radical, misc, dic_number?, query_code?, reading_meaning?,nanori?)*>
                     46: <!ELEMENT literal (#PCDATA)>
                     47: <!--
                     48:        The character itself in UTF8 coding.
                     49:        -->
                     50: <!ELEMENT codepoint (cp_value+)>
                     51:        <!-- 
                     52:        The codepoint element states the code of the character in the various
                     53:        character set standards.
                     54:        -->
                     55: <!ELEMENT cp_value (#PCDATA)>
                     56:        <!-- 
                     57:        The cp_value contains the codepoint of the character in a particular
                     58:        standard. The standard will be identified in the cp_type attribute.
                     59:        -->
                     60: <!ATTLIST cp_value cp_type CDATA #REQUIRED>
                     61:        <!-- 
                     62:        The cp_type attribute states the coding standard applying to the
                     63:        element. The values assigned so far are:
                     64:                jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
                     65:                jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
                     66:                jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
                     67:                ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
                     68:        -->
                     69: <!ELEMENT radical (rad_value+)>
                     70: <!ELEMENT rad_value (#PCDATA)>
                     71:        <!-- 
                     72:        The radical number, in the range 1 to 214. The particular
                     73:        classification type is stated in the rad_type attribute.
                     74:        -->
                     75: <!ATTLIST rad_value rad_type CDATA #REQUIRED>
                     76:        <!-- 
                     77:        The rad_type attribute states the type of radical classification.
                     78:                classical - as recorded in the KangXi Zidian.
                     79:                nelson - as used in the Nelson "Modern Japanese-English 
                     80:                Character Dictionary" (i.e. the Classic, not the New Nelson).
                     81:                This will only be used where Nelson reclassified the kanji.
                     82:        -->
                     83: <!ELEMENT misc (grade?, stroke_count+, variant*, freq*, rad_name*)>
                     84: <!ELEMENT grade (#PCDATA)>
                     85:        <!-- 
                     86:        The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
                     87:        the kanji is taught in Japanese schools. 8 indicates it is one of the
                     88:        remaining Jouyou Kanji to be learned in junior high school, and 9 
                     89:        indicates it is a Jinmeiyou (for use in names) kanji. [G]
                     90:        -->
                     91: <!ELEMENT stroke_count (#PCDATA)>
                     92:        <!-- 
                     93:        The stroke count of the kanji, including the radical. If more than 
                     94:        one, the first is considered the accepted count, while subsequent ones 
                     95:        are common miscounts. (See Appendix E. of the KANJIDIC documentation
                     96:        for some of the rules applied when counting strokes in some of the 
                     97:        radicals.) [S]
                     98:        -->
                     99: <!ELEMENT variant (#PCDATA)>
                    100:        <!-- 
                    101:        A cross-reference code to another kanji, usually regarded as a variant.
                    102:        The type of cross-reference is given in the var_type attribute.
                    103:        -->
                    104: <!ATTLIST variant var_type CDATA #REQUIRED>
                    105:        <!-- 
                    106:        The var_type attribute indicates the type of variant code. The current
                    107:        values are: 
                    108:                jis208 - in JIS X 0208 - kuten coding
                    109:                jis212 - in JIS X 0212 - kuten coding
                    110:                jis213 - in JIS X 0213 - kuten coding
                    111:                deroo - De Roo number - numeric
                    112:                njecd - Halpern NJECD index number - numeric
                    113:                s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
                    114:                nelson - "Classic" Nelson - numeric
                    115:                oneill - Japanese Names (O'Neill) - numeric
                    116:        -->
                    117: <!ELEMENT freq (#PCDATA)>
                    118:        <!-- 
                    119:        A frequency-of-use ranking. The 2,500 most-used characters have a 
                    120:        ranking; those characters that lack this field are not ranked. The 
                    121:        frequency is a number from 1 to 2,500 that expresses the relative 
                    122:        frequency of occurrence of a character in modern Japanese. This is
                    123:        based on a survey in newspapers, so it is biassed towards kanji
                    124:        used in newspaper articles. The discrimination between the less
                    125:        frequently used kanji is not strong.
                    126:        -->
                    127: <!ELEMENT rad_name (#PCDATA)>
                    128:        <!-- 
                    129:        When the kanji is itself a radical and has a name, this element
                    130:        contains the name (in hiragana.) [T2]
                    131:        -->
                    132: <!ELEMENT dic_number (dic_ref+)>
                    133:        <!-- 
                    134:        This element contains the index numbers and similar unstructured
                    135:        information such as page numbers in a number of published dictionaries,
                    136:        and instructional books on kanji.
                    137:        -->
                    138: <!ELEMENT dic_ref (#PCDATA)>
                    139:        <!-- 
                    140:        Each dic_ref contains an index number. The particular dictionary,
                    141:        etc. is defined by the dr_type attribute.
                    142:        -->
                    143: <!ATTLIST dic_ref dr_type CDATA #REQUIRED>
                    144:        <!-- 
                    145:        The dr_type defines the dictionary or reference book, etc. to which
                    146:        dic_ref element applies. The initial allocation is:
                    147:          nelson_c - "Modern Reader's Japanese-English Character Dictionary",  
                    148:                edited by Andrew Nelson (now published as the "Classic" 
                    149:                Nelson).
                    150:          nelson_n - "The New Nelson Japanese-English Character Dictionary", 
                    151:                edited by John Haig.
                    152:          halpern_njecd - "New Japanese-English Character Dictionary", 
                    153:                edited by Jack Halpern.
                    154:          halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by 
                    155:                Jack Halpern.
                    156:          heisig - "Remembering The  Kanji"  by  James Heisig.
                    157:          gakken - "A  New Dictionary of Kanji Usage" (Gakken)
                    158:          oneill_names - "Japanese Names", by P.G. O'Neill. 
                    159:          oneill_kk - "Essential Kanji" by P.G. O'Neill.
                    160:          moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
                    161:                additional attributes are used: m_vol:  the volume of the
                    162:                dictionary in which the kanji is found, and m_page: the page
                    163:                number in the volume.
                    164:          henshall - "A Guide To Remembering Japanese Characters" by
                    165:                Kenneth G.  Henshall.
                    166:          sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
                    167:          sakade - "A Guide To Reading and Writing Japanese" edited by
                    168:                Florence Sakade.
                    169:          henshall3 - "A Guide To Reading and Writing Japanese" 3rd
                    170:                edition, edited by Henshall, Seeley and De Groot.
                    171:          tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
                    172:          crowley - "The Kanji Way to Japanese Language Power" by
                    173:                Dale Crowley.
                    174:          kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
                    175:          busy_people - "Japanese For Busy People" vols I-III, published
                    176:                by the AJLT. The codes are the volume.chapter.
                    177:          kodansha_compact - the "Kodansha Compact Kanji Guide".
                    178:        -->
                    179: <!ATTLIST dic_ref m_vol CDATA #IMPLIED>
                    180:        <!-- 
                    181:        See above under "moro".
                    182:        -->
                    183: <!ATTLIST dic_ref m_page CDATA #IMPLIED>
                    184:        <!-- 
                    185:        See above under "moro".
                    186:        -->
                    187: <!ELEMENT query_code (q_code+)>
                    188:        <!-- 
                    189:        These codes contain information relating to the glyph, and can be used
                    190:        for finding a required kanji. The type of code is defined by the
                    191:        qc_type attribute.
                    192:        -->
                    193: <!ELEMENT q_code (#PCDATA)>
                    194:        <!--
                    195:        The q_code contains the actual query-code value, according to the
                    196:        qc_type attribute.
                    197:        -->
                    198: <!ATTLIST q_code qc_type CDATA #REQUIRED>
                    199:        <!-- 
                    200:        The q_code attribute defines the type of query code. The current values
                    201:        are:
                    202:          skip -  Halpern's SKIP (System  of  Kanji  Indexing  by  Patterns) 
                    203:                code. The  format is n-nn-nn.  See the KANJIDIC  documentation 
                    204:                for  a description of the code and restrictions on  the 
                    205:                commercial  use  of this data. [P]
                    206: 
                    207:          sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle 
                    208:                1996) by Spahn and Hadamitzky. They are in the form nxnn.n,  
                    209:                e.g.  3k11.2, where the  kanji has 3 strokes in the 
                    210:                identifying radical, it is radical "k" in the SH 
                    211:                classification system, there are 11 other strokes, and it is 
                    212:                the 2nd kanji in the 3k11 sequence. (I am very grateful to 
                    213:                Mark Spahn for providing the list of these descriptor codes 
                    214:                for the kanji in this file.) [I]
                    215:          four_corner - the "Four Corner" code for the kanji. This is a code 
                    216:                invented by Wang Chen in 1928. See the KANJIDIC documentation 
                    217:                for  an overview of  the Four Corner System. [Q]
                    218: 
                    219:          deroo - the codes developed by the late Father Joseph De Roo, and 
                    220:                published in  his book "2001 Kanji" (Bojinsha). Fr De Roo 
                    221:                gave his permission for these codes to be included. [DR]
                    222:          misclass - a possible misclassification of the kanji according
                    223:                to one of the code types. (See the "Z" codes in the KANJIDIC
                    224:                documentation for more details.)
                    225:          
                    226:        -->
                    227: <!ELEMENT reading_meaning (rmgroup*, nanori*)>
                    228:        <!-- 
                    229:        The readings for the kanji in several languages, and the meanings, also
                    230:        in several languages. The readings and meanings are grouped to enable
                    231:        the handling of the situation where the meaning is differentiated by 
                    232:        reading. [T1]
                    233:        -->
                    234: <!ELEMENT nanori (#PCDATA)>
                    235:        <!-- 
                    236:        Japanese readings that are now only associated with names.
                    237:        -->
                    238: <!ELEMENT rmgroup (reading*, meaning*)>
                    239: <!ELEMENT reading (#PCDATA)>
                    240:        <!-- 
                    241:        The reading element contains the reading or pronunciation
                    242:        of the kanji.
                    243:        -->
                    244: <!ATTLIST reading r_type CDATA #REQUIRED>
                    245:        <!-- 
                    246:        The r_type attribute defines the type of reading in the reading
                    247:        element. The current values are:
                    248:          pinyin - the modern PinYin romanization of the Chinese reading 
                    249:                of the kanji. The tones are represented by a concluding 
                    250:                digit. [Y]
                    251:          korean_r - the romanized form of the Korean reading(s) of the 
                    252:                kanji.  The readings are in the (Republic of Korea) Ministry 
                    253:                of Education style of romanization. [W]
                    254:          korean_h - the Korean reading(s) of the kanji in hangul.
                    255:          ja_on - the "on" Japanese reading of the kanji, in katakana. A
                    256:                second attribute r_status, if present, will indicate with
                    257:                a value of "jy" whether the reading is approved for a
                    258:                "Jouyou kanji".
                    259:          ja_kun - the "kun" Japanese reading of the kanji, in hiragana. 
                    260:                Where relevant the okurigana is also included separated by a 
                    261:                ".". Readings associated with prefixes and suffixes are 
                    262:                marked with a "-". A second attribute r_status, if present, 
                    263:                will indicate with a value of "jy" whether the reading is 
                    264:                approved for a "Jouyou kanji".
                    265:        -->
                    266: <!ATTLIST reading r_status CDATA #IMPLIED>
                    267:        <!-- 
                    268:        See under ja_on and ja_kun above.
                    269:        -->
                    270: <!ELEMENT meaning (#PCDATA)>
                    271:        <!-- 
                    272:        The meaning associated with the kanji.
                    273:        -->
                    274: <!ATTLIST meaning m_lang CDATA #IMPLIED>
                    275:        <!-- 
                    276:        The m_lang attribute defines the target language of the meaning. It 
                    277:        will be coded using the two-letter language code from the ISO 639 
                    278:        standard. When absent, the value "en" (i.e. English) is implied. [{}]
                    279:        -->
                    280: ] >
                    281: <kanjidic2>
                    282: </kanjidic2>
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>