test/intsubset2.xml - platform/external/libxml2 - Gitiles

 <?xml version="1.0"?>
 <!DOCTYPE kanjidic2 [
 	<!-- Version 1.3
 	This is the DTD of the XML-format kanji file combining information from
 	the KANJIDIC and KANJD212 files. It is intended to be largely self-
 	documenting, with each field being accompanied by an explanatory
 	comment.

 	The file covers the following kanji:
 	(a) the 6,355 kanji from JIS X 0208;
 	(b) the 5,801 kanji from JIS X 0212;
 	(c) the 3,625 kanji from JIS X 0213 as follows:
 		(i) the 2,741 kanji which are also in JIS X 0212 have
 		JIS X 0213 code-points (kuten) added to the existing entry;
 		(ii) the 884 "new" kanji have new entries.

 	At the end of the explanation for a number of fields there is a tag
 	with the format [N]. This indicates the leading letter(s) of the
 	equivalent field in the KANJIDIC and KANJD212 files.

 	The KANJIDIC documentation should also be read for additional
 	information about the information in the file.
 	-->
 <!ELEMENT kanjidic2 (header,character*)>
 <!ELEMENT header (file_version,database_version,date_of_creation)>
 <!--
 	The single header element will contain identification information
 	about the version of the file
 	-->
 <!ELEMENT file_version (#PCDATA)>
 <!--
 	This field denotes the version of kanjidic2 structure, as more
 	than one version may exist.
 	-->
 <!ELEMENT database_version (#PCDATA)>
 <!--
 	The version of the file, in the format YYYY-NN, where NN will be
 	a number starting with 01 for the first version released in a
 	calendar year, then increasing for each version in that year.
 	-->
 <!ELEMENT date_of_creation (#PCDATA)>
 <!--
 	The date the file was created in international format (YYYY-MM-DD).
 	-->
 <!ELEMENT character (literal,codepoint, radical, misc, dic_number?, query_code?, reading_meaning?,nanori?)*>
 <!ELEMENT literal (#PCDATA)>
 <!--
 	The character itself in UTF8 coding.
 	-->
 <!ELEMENT codepoint (cp_value+)>
 	<!--
 	The codepoint element states the code of the character in the various
 	character set standards.
 	-->
 <!ELEMENT cp_value (#PCDATA)>
 	<!--
 	The cp_value contains the codepoint of the character in a particular
 	standard. The standard will be identified in the cp_type attribute.
 	-->
 <!ATTLIST cp_value cp_type CDATA #REQUIRED>
 	<!--
 	The cp_type attribute states the coding standard applying to the
 	element. The values assigned so far are:
 		jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
 		jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
 		jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
 		ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
 	-->
 <!ELEMENT radical (rad_value+)>
 <!ELEMENT rad_value (#PCDATA)>
 	<!--
 	The radical number, in the range 1 to 214. The particular
 	classification type is stated in the rad_type attribute.
 	-->
 <!ATTLIST rad_value rad_type CDATA #REQUIRED>
 	<!--
 	The rad_type attribute states the type of radical classification.
 		classical - as recorded in the KangXi Zidian.
 		nelson - as used in the Nelson "Modern Japanese-English
 		Character Dictionary" (i.e. the Classic, not the New Nelson).
 		This will only be used where Nelson reclassified the kanji.
 	-->
 <!ELEMENT misc (grade?, stroke_count+, variant*, freq*, rad_name*)>
 <!ELEMENT grade (#PCDATA)>
 	<!--
 	The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
 	the kanji is taught in Japanese schools. 8 indicates it is one of the
 	remaining Jouyou Kanji to be learned in junior high school, and 9
 	indicates it is a Jinmeiyou (for use in names) kanji. [G]
 	-->
 <!ELEMENT stroke_count (#PCDATA)>
 	<!--
 	The stroke count of the kanji, including the radical. If more than
 	one, the first is considered the accepted count, while subsequent ones
 	are common miscounts. (See Appendix E. of the KANJIDIC documentation
 	for some of the rules applied when counting strokes in some of the
 	radicals.) [S]
 	-->
 <!ELEMENT variant (#PCDATA)>
 	<!--
 	A cross-reference code to another kanji, usually regarded as a variant.
 	The type of cross-reference is given in the var_type attribute.
 	-->
 <!ATTLIST variant var_type CDATA #REQUIRED>
 	<!--
 	The var_type attribute indicates the type of variant code. The current
 	values are:
 		jis208 - in JIS X 0208 - kuten coding
 		jis212 - in JIS X 0212 - kuten coding
 		jis213 - in JIS X 0213 - kuten coding
 		deroo - De Roo number - numeric
 		njecd - Halpern NJECD index number - numeric
 		s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
 		nelson - "Classic" Nelson - numeric
 		oneill - Japanese Names (O'Neill) - numeric
 	-->
 <!ELEMENT freq (#PCDATA)>
 	<!--
 	A frequency-of-use ranking. The 2,500 most-used characters have a
 	ranking; those characters that lack this field are not ranked. The
 	frequency is a number from 1 to 2,500 that expresses the relative
 	frequency of occurrence of a character in modern Japanese. This is
 	based on a survey in newspapers, so it is biassed towards kanji
 	used in newspaper articles. The discrimination between the less
 	frequently used kanji is not strong.
 	-->
 <!ELEMENT rad_name (#PCDATA)>
 	<!--
 	When the kanji is itself a radical and has a name, this element
 	contains the name (in hiragana.) [T2]
 	-->
 <!ELEMENT dic_number (dic_ref+)>
 	<!--
 	This element contains the index numbers and similar unstructured
 	information such as page numbers in a number of published dictionaries,
 	and instructional books on kanji.
 	-->
 <!ELEMENT dic_ref (#PCDATA)>
 	<!--
 	Each dic_ref contains an index number. The particular dictionary,
 	etc. is defined by the dr_type attribute.
 	-->
 <!ATTLIST dic_ref dr_type CDATA #REQUIRED>
 	<!--
 	The dr_type defines the dictionary or reference book, etc. to which
 	dic_ref element applies. The initial allocation is:
 	  nelson_c - "Modern Reader's Japanese-English Character Dictionary",
 	  	edited by Andrew Nelson (now published as the "Classic"
 	  	Nelson).
 	  nelson_n - "The New Nelson Japanese-English Character Dictionary",
 	  	edited by John Haig.
 	  halpern_njecd - "New Japanese-English Character Dictionary",
 	  	edited by Jack Halpern.
 	  halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by
 	  	Jack Halpern.
 	  heisig - "Remembering The  Kanji"  by  James Heisig.
 	  gakken - "A  New Dictionary of Kanji Usage" (Gakken)
 	  oneill_names - "Japanese Names", by P.G. O'Neill.
 	  oneill_kk - "Essential Kanji" by P.G. O'Neill.
 	  moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
 	  	additional attributes are used: m_vol:  the volume of the
 	  	dictionary in which the kanji is found, and m_page: the page
 	  	number in the volume.
 	  henshall - "A Guide To Remembering Japanese Characters" by
 	  	Kenneth G.  Henshall.
 	  sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
 	  sakade - "A Guide To Reading and Writing Japanese" edited by
 	  	Florence Sakade.
 	  henshall3 - "A Guide To Reading and Writing Japanese" 3rd
 		edition, edited by Henshall, Seeley and De Groot.
 	  tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
 	  crowley - "The Kanji Way to Japanese Language Power" by
 	  	Dale Crowley.
 	  kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
 	  busy_people - "Japanese For Busy People" vols I-III, published
 		by the AJLT. The codes are the volume.chapter.
 	  kodansha_compact - the "Kodansha Compact Kanji Guide".
 	-->
 <!ATTLIST dic_ref m_vol CDATA #IMPLIED>
 	<!--
 	See above under "moro".
 	-->
 <!ATTLIST dic_ref m_page CDATA #IMPLIED>
 	<!--
 	See above under "moro".
 	-->
 <!ELEMENT query_code (q_code+)>
 	<!--
 	These codes contain information relating to the glyph, and can be used
 	for finding a required kanji. The type of code is defined by the
 	qc_type attribute.
 	-->
 <!ELEMENT q_code (#PCDATA)>
 	<!--
 	The q_code contains the actual query-code value, according to the
 	qc_type attribute.
 	-->
 <!ATTLIST q_code qc_type CDATA #REQUIRED>
 	<!--
 	The q_code attribute defines the type of query code. The current values
 	are:
 	  skip -  Halpern's SKIP (System  of  Kanji  Indexing  by  Patterns)
 	  	code. The  format is n-nn-nn.  See the KANJIDIC  documentation
 	  	for  a description of the code and restrictions on  the
 	  	commercial  use  of this data. [P]

 	  sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle
 	  	1996) by Spahn and Hadamitzky. They are in the form nxnn.n,
 	  	e.g.  3k11.2, where the  kanji has 3 strokes in the
 	  	identifying radical, it is radical "k" in the SH
 	  	classification system, there are 11 other strokes, and it is
 	  	the 2nd kanji in the 3k11 sequence. (I am very grateful to
 	  	Mark Spahn for providing the list of these descriptor codes
 	  	for the kanji in this file.) [I]
 	  four_corner - the "Four Corner" code for the kanji. This is a code
 	  	invented by Wang Chen in 1928. See the KANJIDIC documentation
 	  	for  an overview of  the Four Corner System. [Q]

 	  deroo - the codes developed by the late Father Joseph De Roo, and
 	  	published in  his book "2001 Kanji" (Bojinsha). Fr De Roo
 	  	gave his permission for these codes to be included. [DR]
 	  misclass - a possible misclassification of the kanji according
 		to one of the code types. (See the "Z" codes in the KANJIDIC
 		documentation for more details.)

 	-->
 <!ELEMENT reading_meaning (rmgroup*, nanori*)>
 	<!--
 	The readings for the kanji in several languages, and the meanings, also
 	in several languages. The readings and meanings are grouped to enable
 	the handling of the situation where the meaning is differentiated by
 	reading. [T1]
 	-->
 <!ELEMENT nanori (#PCDATA)>
 	<!--
 	Japanese readings that are now only associated with names.
 	-->
 <!ELEMENT rmgroup (reading*, meaning*)>
 <!ELEMENT reading (#PCDATA)>
 	<!--
 	The reading element contains the reading or pronunciation
 	of the kanji.
 	-->
 <!ATTLIST reading r_type CDATA #REQUIRED>
 	<!--
 	The r_type attribute defines the type of reading in the reading
 	element. The current values are:
 	  pinyin - the modern PinYin romanization of the Chinese reading
 	  	of the kanji. The tones are represented by a concluding
 	  	digit. [Y]
 	  korean_r - the romanized form of the Korean reading(s) of the
 	  	kanji.  The readings are in the (Republic of Korea) Ministry
 	  	of Education style of romanization. [W]
 	  korean_h - the Korean reading(s) of the kanji in hangul.
 	  ja_on - the "on" Japanese reading of the kanji, in katakana. A
 	  	second attribute r_status, if present, will indicate with
 	  	a value of "jy" whether the reading is approved for a
 	  	"Jouyou kanji".
 	  ja_kun - the "kun" Japanese reading of the kanji, in hiragana.
 	  	Where relevant the okurigana is also included separated by a
 	  	".". Readings associated with prefixes and suffixes are
 	  	marked with a "-". A second attribute r_status, if present,
 	  	will indicate with a value of "jy" whether the reading is
 	  	approved for a "Jouyou kanji".
 	-->
 <!ATTLIST reading r_status CDATA #IMPLIED>
 	<!--
 	See under ja_on and ja_kun above.
 	-->
 <!ELEMENT meaning (#PCDATA)>
 	<!--
 	The meaning associated with the kanji.
 	-->
 <!ATTLIST meaning m_lang CDATA #IMPLIED>
 	<!--
 	The m_lang attribute defines the target language of the meaning. It
 	will be coded using the two-letter language code from the ISO 639
 	standard. When absent, the value "en" (i.e. English) is implied. [{}]
 	-->
 ] >
 <kanjidic2>
 </kanjidic2>
	<?xml version="1.0"?>
	<!DOCTYPE kanjidic2 [
	<!-- Version 1.3
	This is the DTD of the XML-format kanji file combining information from
	the KANJIDIC and KANJD212 files. It is intended to be largely self-
	documenting, with each field being accompanied by an explanatory
	comment.

	The file covers the following kanji:
	(a) the 6,355 kanji from JIS X 0208;
	(b) the 5,801 kanji from JIS X 0212;
	(c) the 3,625 kanji from JIS X 0213 as follows:
	(i) the 2,741 kanji which are also in JIS X 0212 have
	JIS X 0213 code-points (kuten) added to the existing entry;
	(ii) the 884 "new" kanji have new entries.

	At the end of the explanation for a number of fields there is a tag
	with the format [N]. This indicates the leading letter(s) of the
	equivalent field in the KANJIDIC and KANJD212 files.

	The KANJIDIC documentation should also be read for additional
	information about the information in the file.
	-->
	<!ELEMENT kanjidic2 (header,character*)>
	<!ELEMENT header (file_version,database_version,date_of_creation)>
	<!--
	The single header element will contain identification information
	about the version of the file
	-->
	<!ELEMENT file_version (#PCDATA)>
	<!--
	This field denotes the version of kanjidic2 structure, as more
	than one version may exist.
	-->
	<!ELEMENT database_version (#PCDATA)>
	<!--
	The version of the file, in the format YYYY-NN, where NN will be
	a number starting with 01 for the first version released in a
	calendar year, then increasing for each version in that year.
	-->
	<!ELEMENT date_of_creation (#PCDATA)>
	<!--
	The date the file was created in international format (YYYY-MM-DD).
	-->
	<!ELEMENT character (literal,codepoint, radical, misc, dic_number?, query_code?, reading_meaning?,nanori?)*>
	<!ELEMENT literal (#PCDATA)>
	<!--
	The character itself in UTF8 coding.
	-->
	<!ELEMENT codepoint (cp_value+)>
	<!--
	The codepoint element states the code of the character in the various
	character set standards.
	-->
	<!ELEMENT cp_value (#PCDATA)>
	<!--
	The cp_value contains the codepoint of the character in a particular
	standard. The standard will be identified in the cp_type attribute.
	-->
	<!ATTLIST cp_value cp_type CDATA #REQUIRED>
	<!--
	The cp_type attribute states the coding standard applying to the
	element. The values assigned so far are:
	jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
	jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
	jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
	ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
	-->
	<!ELEMENT radical (rad_value+)>
	<!ELEMENT rad_value (#PCDATA)>
	<!--
	The radical number, in the range 1 to 214. The particular
	classification type is stated in the rad_type attribute.
	-->
	<!ATTLIST rad_value rad_type CDATA #REQUIRED>
	<!--
	The rad_type attribute states the type of radical classification.
	classical - as recorded in the KangXi Zidian.
	nelson - as used in the Nelson "Modern Japanese-English
	Character Dictionary" (i.e. the Classic, not the New Nelson).
	This will only be used where Nelson reclassified the kanji.
	-->
	<!ELEMENT misc (grade?, stroke_count+, variant, freq, rad_name*)>
	<!ELEMENT grade (#PCDATA)>
	<!--
	The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
	the kanji is taught in Japanese schools. 8 indicates it is one of the
	remaining Jouyou Kanji to be learned in junior high school, and 9
	indicates it is a Jinmeiyou (for use in names) kanji. [G]
	-->
	<!ELEMENT stroke_count (#PCDATA)>
	<!--
	The stroke count of the kanji, including the radical. If more than
	one, the first is considered the accepted count, while subsequent ones
	are common miscounts. (See Appendix E. of the KANJIDIC documentation
	for some of the rules applied when counting strokes in some of the
	radicals.) [S]
	-->
	<!ELEMENT variant (#PCDATA)>
	<!--
	A cross-reference code to another kanji, usually regarded as a variant.
	The type of cross-reference is given in the var_type attribute.
	-->
	<!ATTLIST variant var_type CDATA #REQUIRED>
	<!--
	The var_type attribute indicates the type of variant code. The current
	values are:
	jis208 - in JIS X 0208 - kuten coding
	jis212 - in JIS X 0212 - kuten coding
	jis213 - in JIS X 0213 - kuten coding
	deroo - De Roo number - numeric
	njecd - Halpern NJECD index number - numeric
	s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
	nelson - "Classic" Nelson - numeric
	oneill - Japanese Names (O'Neill) - numeric
	-->
	<!ELEMENT freq (#PCDATA)>
	<!--
	A frequency-of-use ranking. The 2,500 most-used characters have a
	ranking; those characters that lack this field are not ranked. The
	frequency is a number from 1 to 2,500 that expresses the relative
	frequency of occurrence of a character in modern Japanese. This is
	based on a survey in newspapers, so it is biassed towards kanji
	used in newspaper articles. The discrimination between the less
	frequently used kanji is not strong.
	-->
	<!ELEMENT rad_name (#PCDATA)>
	<!--
	When the kanji is itself a radical and has a name, this element
	contains the name (in hiragana.) [T2]
	-->
	<!ELEMENT dic_number (dic_ref+)>
	<!--
	This element contains the index numbers and similar unstructured
	information such as page numbers in a number of published dictionaries,
	and instructional books on kanji.
	-->
	<!ELEMENT dic_ref (#PCDATA)>
	<!--
	Each dic_ref contains an index number. The particular dictionary,
	etc. is defined by the dr_type attribute.
	-->
	<!ATTLIST dic_ref dr_type CDATA #REQUIRED>
	<!--
	The dr_type defines the dictionary or reference book, etc. to which
	dic_ref element applies. The initial allocation is:
	nelson_c - "Modern Reader's Japanese-English Character Dictionary",
	edited by Andrew Nelson (now published as the "Classic"
	Nelson).
	nelson_n - "The New Nelson Japanese-English Character Dictionary",
	edited by John Haig.
	halpern_njecd - "New Japanese-English Character Dictionary",
	edited by Jack Halpern.
	halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by
	Jack Halpern.
	heisig - "Remembering The Kanji" by James Heisig.
	gakken - "A New Dictionary of Kanji Usage" (Gakken)
	oneill_names - "Japanese Names", by P.G. O'Neill.
	oneill_kk - "Essential Kanji" by P.G. O'Neill.
	moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
	additional attributes are used: m_vol: the volume of the
	dictionary in which the kanji is found, and m_page: the page
	number in the volume.
	henshall - "A Guide To Remembering Japanese Characters" by
	Kenneth G. Henshall.
	sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
	sakade - "A Guide To Reading and Writing Japanese" edited by
	Florence Sakade.
	henshall3 - "A Guide To Reading and Writing Japanese" 3rd
	edition, edited by Henshall, Seeley and De Groot.
	tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
	crowley - "The Kanji Way to Japanese Language Power" by
	Dale Crowley.
	kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
	busy_people - "Japanese For Busy People" vols I-III, published
	by the AJLT. The codes are the volume.chapter.
	kodansha_compact - the "Kodansha Compact Kanji Guide".
	-->
	<!ATTLIST dic_ref m_vol CDATA #IMPLIED>
	<!--
	See above under "moro".
	-->
	<!ATTLIST dic_ref m_page CDATA #IMPLIED>
	<!--
	See above under "moro".
	-->
	<!ELEMENT query_code (q_code+)>
	<!--
	These codes contain information relating to the glyph, and can be used
	for finding a required kanji. The type of code is defined by the
	qc_type attribute.
	-->
	<!ELEMENT q_code (#PCDATA)>
	<!--
	The q_code contains the actual query-code value, according to the
	qc_type attribute.
	-->
	<!ATTLIST q_code qc_type CDATA #REQUIRED>
	<!--
	The q_code attribute defines the type of query code. The current values
	are:
	skip - Halpern's SKIP (System of Kanji Indexing by Patterns)
	code. The format is n-nn-nn. See the KANJIDIC documentation
	for a description of the code and restrictions on the
	commercial use of this data. [P]

	sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle
	1996) by Spahn and Hadamitzky. They are in the form nxnn.n,
	e.g. 3k11.2, where the kanji has 3 strokes in the
	identifying radical, it is radical "k" in the SH
	classification system, there are 11 other strokes, and it is
	the 2nd kanji in the 3k11 sequence. (I am very grateful to
	Mark Spahn for providing the list of these descriptor codes
	for the kanji in this file.) [I]
	four_corner - the "Four Corner" code for the kanji. This is a code
	invented by Wang Chen in 1928. See the KANJIDIC documentation
	for an overview of the Four Corner System. [Q]

	deroo - the codes developed by the late Father Joseph De Roo, and
	published in his book "2001 Kanji" (Bojinsha). Fr De Roo
	gave his permission for these codes to be included. [DR]
	misclass - a possible misclassification of the kanji according
	to one of the code types. (See the "Z" codes in the KANJIDIC
	documentation for more details.)

	-->
	<!ELEMENT reading_meaning (rmgroup, nanori)>
	<!--
	The readings for the kanji in several languages, and the meanings, also
	in several languages. The readings and meanings are grouped to enable
	the handling of the situation where the meaning is differentiated by
	reading. [T1]
	-->
	<!ELEMENT nanori (#PCDATA)>
	<!--
	Japanese readings that are now only associated with names.
	-->
	<!ELEMENT rmgroup (reading, meaning)>
	<!ELEMENT reading (#PCDATA)>
	<!--
	The reading element contains the reading or pronunciation
	of the kanji.
	-->
	<!ATTLIST reading r_type CDATA #REQUIRED>
	<!--
	The r_type attribute defines the type of reading in the reading
	element. The current values are:
	pinyin - the modern PinYin romanization of the Chinese reading
	of the kanji. The tones are represented by a concluding
	digit. [Y]
	korean_r - the romanized form of the Korean reading(s) of the
	kanji. The readings are in the (Republic of Korea) Ministry
	of Education style of romanization. [W]
	korean_h - the Korean reading(s) of the kanji in hangul.
	ja_on - the "on" Japanese reading of the kanji, in katakana. A
	second attribute r_status, if present, will indicate with
	a value of "jy" whether the reading is approved for a
	"Jouyou kanji".
	ja_kun - the "kun" Japanese reading of the kanji, in hiragana.
	Where relevant the okurigana is also included separated by a
	".". Readings associated with prefixes and suffixes are
	marked with a "-". A second attribute r_status, if present,
	will indicate with a value of "jy" whether the reading is
	approved for a "Jouyou kanji".
	-->
	<!ATTLIST reading r_status CDATA #IMPLIED>
	<!--
	See under ja_on and ja_kun above.
	-->
	<!ELEMENT meaning (#PCDATA)>
	<!--
	The meaning associated with the kanji.
	-->
	<!ATTLIST meaning m_lang CDATA #IMPLIED>
	<!--
	The m_lang attribute defines the target language of the meaning. It
	will be coded using the two-letter language code from the ISO 639
	standard. When absent, the value "en" (i.e. English) is implied. [{}]
	-->
	] >
	<kanjidic2>
	</kanjidic2>