Annotation of embedaddon/libxml2/result/intsubset2.xml.sax, revision 1.1

1.1     ! misho       1: SAX.setDocumentLocator()
        !             2: SAX.startDocument()
        !             3: SAX.internalSubset(kanjidic2, , )
        !             4: SAX.comment( Version 1.3
        !             5:        This is the DTD of the XML-format kanji file combining information from
        !             6:        the KANJIDIC and KANJD212 files. It is intended to be largely self-
        !             7:        documenting, with each field being accompanied by an explanatory
        !             8:        comment.
        !             9: 
        !            10:        The file covers the following kanji:
        !            11:        (a) the 6,355 kanji from JIS X 0208;
        !            12:        (b) the 5,801 kanji from JIS X 0212;
        !            13:        (c) the 3,625 kanji from JIS X 0213 as follows:
        !            14:                (i) the 2,741 kanji which are also in JIS X 0212 have
        !            15:                JIS X 0213 code-points (kuten) added to the existing entry;
        !            16:                (ii) the 884 "new" kanji have new entries.
        !            17: 
        !            18:        At the end of the explanation for a number of fields there is a tag
        !            19:        with the format [N]. This indicates the leading letter(s) of the
        !            20:        equivalent field in the KANJIDIC and KANJD212 files.
        !            21: 
        !            22:        The KANJIDIC documentation should also be read for additional 
        !            23:        information about the information in the file.
        !            24:        )
        !            25: SAX.elementDecl(kanjidic2, 4, ...)
        !            26: SAX.elementDecl(header, 4, ...)
        !            27: SAX.comment(
        !            28:        The single header element will contain identification information
        !            29:        about the version of the file 
        !            30:        )
        !            31: SAX.elementDecl(file_version, 3, ...)
        !            32: SAX.comment(
        !            33:        This field denotes the version of kanjidic2 structure, as more
        !            34:        than one version may exist.
        !            35:        )
        !            36: SAX.elementDecl(database_version, 3, ...)
        !            37: SAX.comment(
        !            38:        The version of the file, in the format YYYY-NN, where NN will be
        !            39:        a number starting with 01 for the first version released in a
        !            40:        calendar year, then increasing for each version in that year.
        !            41:        )
        !            42: SAX.elementDecl(date_of_creation, 3, ...)
        !            43: SAX.comment(
        !            44:        The date the file was created in international format (YYYY-MM-DD).
        !            45:        )
        !            46: SAX.elementDecl(character, 4, ...)
        !            47: SAX.elementDecl(literal, 3, ...)
        !            48: SAX.comment(
        !            49:        The character itself in UTF8 coding.
        !            50:        )
        !            51: SAX.elementDecl(codepoint, 4, ...)
        !            52: SAX.comment( 
        !            53:        The codepoint element states the code of the character in the various
        !            54:        character set standards.
        !            55:        )
        !            56: SAX.elementDecl(cp_value, 3, ...)
        !            57: SAX.comment( 
        !            58:        The cp_value contains the codepoint of the character in a particular
        !            59:        standard. The standard will be identified in the cp_type attribute.
        !            60:        )
        !            61: SAX.attributeDecl(cp_value, cp_type, 1, 2, NULL, ...)
        !            62: SAX.comment( 
        !            63:        The cp_type attribute states the coding standard applying to the
        !            64:        element. The values assigned so far are:
        !            65:                jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
        !            66:                jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
        !            67:                jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
        !            68:                ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
        !            69:        )
        !            70: SAX.elementDecl(radical, 4, ...)
        !            71: SAX.elementDecl(rad_value, 3, ...)
        !            72: SAX.comment( 
        !            73:        The radical number, in the range 1 to 214. The particular
        !            74:        classification type is stated in the rad_type attribute.
        !            75:        )
        !            76: SAX.attributeDecl(rad_value, rad_type, 1, 2, NULL, ...)
        !            77: SAX.comment( 
        !            78:        The rad_type attribute states the type of radical classification.
        !            79:                classical - as recorded in the KangXi Zidian.
        !            80:                nelson - as used in the Nelson "Modern Japanese-English 
        !            81:                Character Dictionary" (i.e. the Classic, not the New Nelson).
        !            82:                This will only be used where Nelson reclassified the kanji.
        !            83:        )
        !            84: SAX.elementDecl(misc, 4, ...)
        !            85: SAX.elementDecl(grade, 3, ...)
        !            86: SAX.comment( 
        !            87:        The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
        !            88:        the kanji is taught in Japanese schools. 8 indicates it is one of the
        !            89:        remaining Jouyou Kanji to be learned in junior high school, and 9 
        !            90:        indicates it is a Jinmeiyou (for use in names) kanji. [G]
        !            91:        )
        !            92: SAX.elementDecl(stroke_count, 3, ...)
        !            93: SAX.comment( 
        !            94:        The stroke count of the kanji, including the radical. If more than 
        !            95:        one, the first is considered the accepted count, while subsequent ones 
        !            96:        are common miscounts. (See Appendix E. of the KANJIDIC documentation
        !            97:        for some of the rules applied when counting strokes in some of the 
        !            98:        radicals.) [S]
        !            99:        )
        !           100: SAX.elementDecl(variant, 3, ...)
        !           101: SAX.comment( 
        !           102:        A cross-reference code to another kanji, usually regarded as a variant.
        !           103:        The type of cross-reference is given in the var_type attribute.
        !           104:        )
        !           105: SAX.attributeDecl(variant, var_type, 1, 2, NULL, ...)
        !           106: SAX.comment( 
        !           107:        The var_type attribute indicates the type of variant code. The current
        !           108:        values are: 
        !           109:                jis208 - in JIS X 0208 - kuten coding
        !           110:                jis212 - in JIS X 0212 - kuten coding
        !           111:                jis213 - in JIS X 0213 - kuten coding
        !           112:                deroo - De Roo number - numeric
        !           113:                njecd - Halpern NJECD index number - numeric
        !           114:                s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
        !           115:                nelson - "Classic" Nelson - numeric
        !           116:                oneill - Japanese Names (O'Neill) - numeric
        !           117:        )
        !           118: SAX.elementDecl(freq, 3, ...)
        !           119: SAX.comment( 
        !           120:        A frequency-of-use ranking. The 2,500 most-used characters have a 
        !           121:        ranking; those characters that lack this field are not ranked. The 
        !           122:        frequency is a number from 1 to 2,500 that expresses the relative 
        !           123:        frequency of occurrence of a character in modern Japanese. This is
        !           124:        based on a survey in newspapers, so it is biassed towards kanji
        !           125:        used in newspaper articles. The discrimination between the less
        !           126:        frequently used kanji is not strong.
        !           127:        )
        !           128: SAX.elementDecl(rad_name, 3, ...)
        !           129: SAX.comment( 
        !           130:        When the kanji is itself a radical and has a name, this element
        !           131:        contains the name (in hiragana.) [T2]
        !           132:        )
        !           133: SAX.elementDecl(dic_number, 4, ...)
        !           134: SAX.comment( 
        !           135:        This element contains the index numbers and similar unstructured
        !           136:        information such as page numbers in a number of published dictionaries,
        !           137:        and instructional books on kanji.
        !           138:        )
        !           139: SAX.elementDecl(dic_ref, 3, ...)
        !           140: SAX.comment( 
        !           141:        Each dic_ref contains an index number. The particular dictionary,
        !           142:        etc. is defined by the dr_type attribute.
        !           143:        )
        !           144: SAX.attributeDecl(dic_ref, dr_type, 1, 2, NULL, ...)
        !           145: SAX.comment( 
        !           146:        The dr_type defines the dictionary or reference book, etc. to which
        !           147:        dic_ref element applies. The initial allocation is:
        !           148:          nelson_c - "Modern Reader's Japanese-English Character Dictionary",  
        !           149:                edited by Andrew Nelson (now published as the "Classic" 
        !           150:                Nelson).
        !           151:          nelson_n - "The New Nelson Japanese-English Character Dictionary", 
        !           152:                edited by John Haig.
        !           153:          halpern_njecd - "New Japanese-English Character Dictionary", 
        !           154:                edited by Jack Halpern.
        !           155:          halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by 
        !           156:                Jack Halpern.
        !           157:          heisig - "Remembering The  Kanji"  by  James Heisig.
        !           158:          gakken - "A  New Dictionary of Kanji Usage" (Gakken)
        !           159:          oneill_names - "Japanese Names", by P.G. O'Neill. 
        !           160:          oneill_kk - "Essential Kanji" by P.G. O'Neill.
        !           161:          moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
        !           162:                additional attributes are used: m_vol:  the volume of the
        !           163:                dictionary in which the kanji is found, and m_page: the page
        !           164:                number in the volume.
        !           165:          henshall - "A Guide To Remembering Japanese Characters" by
        !           166:                Kenneth G.  Henshall.
        !           167:          sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
        !           168:          sakade - "A Guide To Reading and Writing Japanese" edited by
        !           169:                Florence Sakade.
        !           170:          henshall3 - "A Guide To Reading and Writing Japanese" 3rd
        !           171:                edition, edited by Henshall, Seeley and De Groot.
        !           172:          tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
        !           173:          crowley - "The Kanji Way to Japanese Language Power" by
        !           174:                Dale Crowley.
        !           175:          kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
        !           176:          busy_people - "Japanese For Busy People" vols I-III, published
        !           177:                by the AJLT. The codes are the volume.chapter.
        !           178:          kodansha_compact - the "Kodansha Compact Kanji Guide".
        !           179:        )
        !           180: SAX.attributeDecl(dic_ref, m_vol, 1, 3, NULL, ...)
        !           181: SAX.comment( 
        !           182:        See above under "moro".
        !           183:        )
        !           184: SAX.attributeDecl(dic_ref, m_page, 1, 3, NULL, ...)
        !           185: SAX.comment( 
        !           186:        See above under "moro".
        !           187:        )
        !           188: SAX.elementDecl(query_code, 4, ...)
        !           189: SAX.comment( 
        !           190:        These codes contain information relating to the glyph, and can be used
        !           191:        for finding a required kanji. The type of code is defined by the
        !           192:        qc_type attribute.
        !           193:        )
        !           194: SAX.elementDecl(q_code, 3, ...)
        !           195: SAX.comment(
        !           196:        The q_code contains the actual query-code value, according to the
        !           197:        qc_type attribute.
        !           198:        )
        !           199: SAX.attributeDecl(q_code, qc_type, 1, 2, NULL, ...)
        !           200: SAX.comment( 
        !           201:        The q_code attribute defines the type of query code. The current values
        !           202:        are:
        !           203:          skip -  Halpern's SKIP (System  of  Kanji  Indexing  by  Patterns) 
        !           204:                code. The  format is n-nn-nn.  See the KANJIDIC  documentation 
        !           205:                for  a description of the code and restrictions on  the 
        !           206:                commercial  use  of this data. [P]
        !           207: 
        !           208:          sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle 
        !           209:                1996) by Spahn and Hadamitzky. They are in the form nxnn.n,  
        !           210:                e.g.  3k11.2, where the  kanji has 3 strokes in the 
        !           211:                identifying radical, it is radical "k" in the SH 
        !           212:                classification system, there are 11 other strokes, and it is 
        !           213:                the 2nd kanji in the 3k11 sequence. (I am very grateful to 
        !           214:                Mark Spahn for providing the list of these descriptor codes 
        !           215:                for the kanji in this file.) [I]
        !           216:          four_corner - the "Four Corner" code for the kanji. This is a code 
        !           217:                invented by Wang Chen in 1928. See the KANJIDIC documentation 
        !           218:                for  an overview of  the Four Corner System. [Q]
        !           219: 
        !           220:          deroo - the codes developed by the late Father Joseph De Roo, and 
        !           221:                published in  his book "2001 Kanji" (Bojinsha). Fr De Roo 
        !           222:                gave his permission for these codes to be included. [DR]
        !           223:          misclass - a possible misclassification of the kanji according
        !           224:                to one of the code types. (See the "Z" codes in the KANJIDIC
        !           225:                documentation for more details.)
        !           226:          
        !           227:        )
        !           228: SAX.elementDecl(reading_meaning, 4, ...)
        !           229: SAX.comment( 
        !           230:        The readings for the kanji in several languages, and the meanings, also
        !           231:        in several languages. The readings and meanings are grouped to enable
        !           232:        the handling of the situation where the meaning is differentiated by 
        !           233:        reading. [T1]
        !           234:        )
        !           235: SAX.elementDecl(nanori, 3, ...)
        !           236: SAX.comment( 
        !           237:        Japanese readings that are now only associated with names.
        !           238:        )
        !           239: SAX.elementDecl(rmgroup, 4, ...)
        !           240: SAX.elementDecl(reading, 3, ...)
        !           241: SAX.comment( 
        !           242:        The reading element contains the reading or pronunciation
        !           243:        of the kanji.
        !           244:        )
        !           245: SAX.attributeDecl(reading, r_type, 1, 2, NULL, ...)
        !           246: SAX.comment( 
        !           247:        The r_type attribute defines the type of reading in the reading
        !           248:        element. The current values are:
        !           249:          pinyin - the modern PinYin romanization of the Chinese reading 
        !           250:                of the kanji. The tones are represented by a concluding 
        !           251:                digit. [Y]
        !           252:          korean_r - the romanized form of the Korean reading(s) of the 
        !           253:                kanji.  The readings are in the (Republic of Korea) Ministry 
        !           254:                of Education style of romanization. [W]
        !           255:          korean_h - the Korean reading(s) of the kanji in hangul.
        !           256:          ja_on - the "on" Japanese reading of the kanji, in katakana. A
        !           257:                second attribute r_status, if present, will indicate with
        !           258:                a value of "jy" whether the reading is approved for a
        !           259:                "Jouyou kanji".
        !           260:          ja_kun - the "kun" Japanese reading of the kanji, in hiragana. 
        !           261:                Where relevant the okurigana is also included separated by a 
        !           262:                ".". Readings associated with prefixes and suffixes are 
        !           263:                marked with a "-". A second attribute r_status, if present, 
        !           264:                will indicate with a value of "jy" whether the reading is 
        !           265:                approved for a "Jouyou kanji".
        !           266:        )
        !           267: SAX.attributeDecl(reading, r_status, 1, 3, NULL, ...)
        !           268: SAX.comment( 
        !           269:        See under ja_on and ja_kun above.
        !           270:        )
        !           271: SAX.elementDecl(meaning, 3, ...)
        !           272: SAX.comment( 
        !           273:        The meaning associated with the kanji.
        !           274:        )
        !           275: SAX.attributeDecl(meaning, m_lang, 1, 3, NULL, ...)
        !           276: SAX.comment( 
        !           277:        The m_lang attribute defines the target language of the meaning. It 
        !           278:        will be coded using the two-letter language code from the ISO 639 
        !           279:        standard. When absent, the value "en" (i.e. English) is implied. [{}]
        !           280:        )
        !           281: SAX.externalSubset(kanjidic2, , )
        !           282: SAX.startElement(kanjidic2)
        !           283: SAX.characters(
        !           284: , 1)
        !           285: SAX.endElement(kanjidic2)
        !           286: SAX.endDocument()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>