Adjust CJK Ideograph range to Unicode 4.1.
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 5a4378b..9eda653 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -803,7 +803,7 @@
{
return (
(0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
- (0x4E00 <= code && code <= 0x9FA5) || /* CJK Ideograph */
+ (0x4E00 <= code && code <= 0x9FBB) || /* CJK Ideograph */
(0x20000 <= code && code <= 0x2A6D6));/* CJK Ideograph Extension B */
}
@@ -815,6 +815,17 @@
int word;
unsigned char* w;
+ if (code >= 0x110000)
+ return 0;
+
+ if (self) {
+ const change_record *old = get_old_record(self, code);
+ if (old->category_changed == 0) {
+ /* unassigned */
+ return 0;
+ }
+ }
+
if (SBase <= code && code < SBase+SCount) {
/* Hangul syllable. */
int SIndex = code - SBase;
@@ -845,18 +856,6 @@
return 1;
}
- if (code >= 0x110000)
- return 0;
-
- if (self) {
- const change_record *old = get_old_record(self, code);
- if (old->category_changed == 0) {
- /* unassigned */
- return 0;
- }
- }
-
-
/* get offset into phrasebook */
offset = phrasebook_offset1[(code>>phrasebook_shift)];
offset = phrasebook_offset2[(offset<<phrasebook_shift) +