From 276253ee5d938c7e91946e3df7d73b3a768fe0d0 Mon Sep 17 00:00:00 2001 From: tomo Date: Tue, 17 Dec 2002 19:55:14 +0000 Subject: [PATCH] Sync with r21-2-19-utf-2000-0_10-0. --- src/ChangeLog | 73 +++++++++++++ src/char-ucs.h | 287 +++++++++++++++++++++++++++------------------------- src/mule-charset.c | 277 +++++++++++++++++++++----------------------------- src/text-coding.c | 36 +++++-- 4 files changed, 365 insertions(+), 308 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 8654bc6..5b03b64 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,76 @@ +1999-10-29 MORIOKA Tomohiko + + * mule-charset.c (Fcharset_name): Define `byte_offset' in + non-UTF-2000 configuration. + +1999-10-29 MORIOKA Tomohiko + + * text-coding.c (char_encode_shift_jis): Use `charset_code_point' + not to use `XCHARSET_ENCODING_TABLE (Vcharset_latin_jisx0201)'. + + * mule-charset.c (mark_charset): `cs->encoding_table' has been + deleted. + (make_charset): Don't use `CHARSET_ENCODING_TABLE(cs)'. + (Fset_charset_mapping_table): Likewise. + + * char-ucs.h (struct Lisp_Charset): Delete `encoding_table'. + (CHARSET_ENCODING_TABLE): Delete. + (XCHARSET_ENCODING_TABLE): Delete. + (charset_code_point): New interface. + +1999-10-29 MORIOKA Tomohiko + + * text-coding.c (char_encode_iso2022): Use `charset_code_point' + instead of `charset_get_byte1' and `charset_get_byte2'. + + * mule-charset.c, char-ucs.h (charset_get_byte1): Deleted. + (charset_get_byte2): Deleted. + +1999-10-28 MORIOKA Tomohiko + + * char-ucs.h (SPLIT_CHAR): New inline function. + (breakup_char_1): Use `SPLIT_CHAR'. + + * mule-charset.c (range_charset_code_point): New function. + (charset_code_point): New function. + + * char-ucs.h (range_charset_code_point): New interface. + (breakup_char_1): Use `range_charset_code_point'. + +1999-10-27 MORIOKA Tomohiko + + * mule-charset.c (Fmake_charset): Delete unused local variable + `code_offset'. + + * char-ucs.h (Vcharacter_attribute_table): New extern variable. + (breakup_char_1): Find a charset and code-point in + `Vcharacter_attribute_table'. + +1999-10-27 MORIOKA Tomohiko + + * mule-charset.c (vars_of_mule_charset): Update `utf-2000-version' + to 0.10 (Yao). + +1999-10-25 MORIOKA Tomohiko + + * mule-charset.c (Vcharacter_attribute_table): New variable. + (Fchar_attribute_alist): New function. + (Fget_char_attribute): New function. + (Fput_char_attribute): New function. + (Fset_charset_mapping_table): Setup `Vcharacter_attribute_table' + too. + (syms_of_mule_charset): Add new function `char-attribute-alist', + `get-char-attribute' and `put-char-attribute'. + (vars_of_mule_charset): Setup `Vcharacter_attribute_table'. + +1999-10-19 MORIOKA Tomohiko + + * mule-charset.c (Fmake_charset): Just use + `get_unallocated_leading_byte'. + + * char-ucs.h (LEADING_BYTE_*): Use ISO-IR numbers for official + sets; don't use final-byte based number for private sets. + 1999-10-12 MORIOKA Tomohiko * mule-charset.c (vars_of_mule_charset): Update `utf-2000-version' diff --git a/src/char-ucs.h b/src/char-ucs.h index bf23902..e9b9e22 100644 --- a/src/char-ucs.h +++ b/src/char-ucs.h @@ -76,117 +76,120 @@ typedef int Charset_ID; /* VISCII 1.1 */ #define LEADING_BYTE_LATIN_VISCII (CHARSET_ID_OFFSET - 3) -#define LEADING_BYTE_HIRAGANA_JISX0208 (CHARSET_ID_OFFSET - 4) -#define LEADING_BYTE_KATAKANA_JISX0208 (CHARSET_ID_OFFSET - 5) +/* MULE VISCII-LOWER (CHARSET_ID_OFFSET_96 + '1') */ +#define LEADING_BYTE_LATIN_VISCII_LOWER (CHARSET_ID_OFFSET - 4) + +/* MULE VISCII-UPPER (CHARSET_ID_OFFSET_96 + '2') */ +#define LEADING_BYTE_LATIN_VISCII_UPPER (CHARSET_ID_OFFSET - 5) + +/* Big5 Level 1 2/4 2/{(8),9,10,11} 4/0 '0' */ +#define LEADING_BYTE_CHINESE_BIG5_1 (CHARSET_ID_OFFSET - 6) + +/* Big5 Level 2 2/4 2/{(8),9,10,11} 4/0 '1' */ +#define LEADING_BYTE_CHINESE_BIG5_2 (CHARSET_ID_OFFSET - 7) + +#define LEADING_BYTE_HIRAGANA_JISX0208 (CHARSET_ID_OFFSET - 8) +#define LEADING_BYTE_KATAKANA_JISX0208 (CHARSET_ID_OFFSET - 9) #define MIN_LEADING_BYTE_PRIVATE MIN_LEADING_BYTE -#define MAX_LEADING_BYTE_PRIVATE (CHARSET_ID_OFFSET - 6) +#define MAX_LEADING_BYTE_PRIVATE (CHARSET_ID_OFFSET - 10) -#define CHARSET_ID_OFFSET_94 (CHARSET_ID_OFFSET - '0') +/* #define CHARSET_ID_OFFSET_94 (CHARSET_ID_OFFSET - '0') */ -#define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0') -#define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?') +/* #define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0') */ +/* #define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?') */ /* ISO 646 IRV */ -#define LEADING_BYTE_ASCII (CHARSET_ID_OFFSET_94 + 'B') +#define LEADING_BYTE_ASCII 6 /* (CHARSET_ID_OFFSET_94 + 'B') */ /* Right half of JIS X0201-1976 */ -#define LEADING_BYTE_KATAKANA_JISX0201 (CHARSET_ID_OFFSET_94 + 'I') +#define LEADING_BYTE_KATAKANA_JISX0201 13 /* (CHARSET_ID_OFFSET_94 + 'I') */ /* Left half of JIS X0201-1976 */ -#define LEADING_BYTE_LATIN_JISX0201 (CHARSET_ID_OFFSET_94 + 'J') +#define LEADING_BYTE_LATIN_JISX0201 14 /* (CHARSET_ID_OFFSET_94 + 'J') */ -#define CHARSET_ID_OFFSET_96 (CHARSET_ID_OFFSET_94 + 80) - -#define LEADING_BYTE_LATIN_VISCII_LOWER (CHARSET_ID_OFFSET_96 + '1') -#define LEADING_BYTE_LATIN_VISCII_UPPER (CHARSET_ID_OFFSET_96 + '2') +/* #define CHARSET_ID_OFFSET_96 (CHARSET_ID_OFFSET_94 + 80) */ /* Right half of ISO 8859-1 */ -#define LEADING_BYTE_LATIN_ISO8859_1 (CHARSET_ID_OFFSET_96 + 'A') +#define LEADING_BYTE_LATIN_ISO8859_1 100 /* (CHARSET_ID_OFFSET_96 + 'A') */ /* Right half of ISO 8859-2 */ -#define LEADING_BYTE_LATIN_ISO8859_2 (CHARSET_ID_OFFSET_96 + 'B') +#define LEADING_BYTE_LATIN_ISO8859_2 101 /* (CHARSET_ID_OFFSET_96 + 'B') */ /* Right half of ISO 8859-3 */ -#define LEADING_BYTE_LATIN_ISO8859_3 (CHARSET_ID_OFFSET_96 + 'C') +#define LEADING_BYTE_LATIN_ISO8859_3 109 /* (CHARSET_ID_OFFSET_96 + 'C') */ /* Right half of ISO 8859-4 */ -#define LEADING_BYTE_LATIN_ISO8859_4 (CHARSET_ID_OFFSET_96 + 'D') +#define LEADING_BYTE_LATIN_ISO8859_4 110 /* (CHARSET_ID_OFFSET_96 + 'D') */ /* Right half of ISO 8859-7 */ -#define LEADING_BYTE_GREEK_ISO8859_7 (CHARSET_ID_OFFSET_96 + 'F') +#define LEADING_BYTE_GREEK_ISO8859_7 126 /* (CHARSET_ID_OFFSET_96 + 'F') */ /* Right half of ISO 8859-6 */ -#define LEADING_BYTE_ARABIC_ISO8859_6 (CHARSET_ID_OFFSET_96 + 'G') +#define LEADING_BYTE_ARABIC_ISO8859_6 127 /* (CHARSET_ID_OFFSET_96 + 'G') */ /* Right half of ISO 8859-8 */ -#define LEADING_BYTE_HEBREW_ISO8859_8 (CHARSET_ID_OFFSET_96 + 'H') +#define LEADING_BYTE_HEBREW_ISO8859_8 138 /* (CHARSET_ID_OFFSET_96 + 'H') */ /* Right half of ISO 8859-5 */ -#define LEADING_BYTE_CYRILLIC_ISO8859_5 (CHARSET_ID_OFFSET_96 + 'L') +#define LEADING_BYTE_CYRILLIC_ISO8859_5 144 /* (CHARSET_ID_OFFSET_96 + 'L') */ /* Right half of ISO 8859-9 */ -#define LEADING_BYTE_LATIN_ISO8859_9 (CHARSET_ID_OFFSET_96 + 'M') +#define LEADING_BYTE_LATIN_ISO8859_9 148 /* (CHARSET_ID_OFFSET_96 + 'M') */ /* TIS620-2533 */ -#define LEADING_BYTE_THAI_TIS620 (CHARSET_ID_OFFSET_96 + 'T') - +#define LEADING_BYTE_THAI_TIS620 166 /* (CHARSET_ID_OFFSET_96 + 'T') */ -#define CHARSET_ID_OFFSET_94x94 (CHARSET_ID_OFFSET_96 + 80) -/* Big5 Level 1 */ -#define LEADING_BYTE_CHINESE_BIG5_1 ('0' + CHARSET_ID_OFFSET_94x94) +/* #define CHARSET_ID_OFFSET_94x94 (CHARSET_ID_OFFSET_96 + 80) */ -/* Big5 Level 2 */ -#define LEADING_BYTE_CHINESE_BIG5_2 ('1' + CHARSET_ID_OFFSET_94x94) -#define MIN_LEADING_BYTE_PRIVATE_2 ('0' + CHARSET_ID_OFFSET_94x94) -#define MAX_LEADING_BYTE_PRIVATE_2 ('?' + CHARSET_ID_OFFSET_94x94) +/* #define MIN_LEADING_BYTE_PRIVATE_2 ('0' + CHARSET_ID_OFFSET_94x94) */ +/* #define MAX_LEADING_BYTE_PRIVATE_2 ('?' + CHARSET_ID_OFFSET_94x94) */ -/* Japanese JIS X0208-1978 */ -#define LEADING_BYTE_JAPANESE_JISX0208_1978 \ - ('@' + CHARSET_ID_OFFSET_94x94) +/* Japanese JIS X0208-1978 2/4 2/{(8),9,10,11} 4/0 (@) */ +#define LEADING_BYTE_JAPANESE_JISX0208_1978 42 -/* Chinese Hanzi GB2312-1980 */ -#define LEADING_BYTE_CHINESE_GB2312 ('A' + CHARSET_ID_OFFSET_94x94) +/* Chinese Hanzi GB2312-1980 2/4 2/{(8),9,10,11} 4/1 (A) */ +#define LEADING_BYTE_CHINESE_GB2312 58 -/* Japanese JIS X0208-1983 */ -#define LEADING_BYTE_JAPANESE_JISX0208 ('B' + CHARSET_ID_OFFSET_94x94) +/* Japanese JIS X0208-1983 2/4 2/{(8),9,10,11} 4/2 (B) */ +#define LEADING_BYTE_JAPANESE_JISX0208 87 -/* Hangul KS C5601-1987 */ -#define LEADING_BYTE_KOREAN_KSC5601 ('C' + CHARSET_ID_OFFSET_94x94) +/* Hangul KS C5601-1987 2/4 2/{8,9,10,11} 4/3 (C) */ +#define LEADING_BYTE_KOREAN_KSC5601 149 -/* Japanese JIS X0212-1990 */ -#define LEADING_BYTE_JAPANESE_JISX0212 ('D' + CHARSET_ID_OFFSET_94x94) +/* Japanese JIS X0212-1990 2/4 2/{8,9,10,11} 4/4 (D) */ +#define LEADING_BYTE_JAPANESE_JISX0212 159 -/* CCITT Extended GB */ -#define LEADING_BYTE_CHINESE_CCITT_GB ('E' + CHARSET_ID_OFFSET_94x94) +/* CCITT Extended GB 2/4 2/{8,9,10,11} 4/5 (E) */ +#define LEADING_BYTE_CHINESE_CCITT_GB 165 -/* Chinese CNS11643 Set 1 */ -#define LEADING_BYTE_CHINESE_CNS11643_1 ('G' + CHARSET_ID_OFFSET_94x94) +/* Chinese CNS11643 Set 1 2/4 2/{8,9,10,11} 4/7 (G) */ +#define LEADING_BYTE_CHINESE_CNS11643_1 171 -/* Chinese CNS11643 Set 2 */ -#define LEADING_BYTE_CHINESE_CNS11643_2 ('H' + CHARSET_ID_OFFSET_94x94) +/* Chinese CNS11643 Set 2 2/4 2/{8,9,10,11} 4/8 (H) */ +#define LEADING_BYTE_CHINESE_CNS11643_2 172 -/* Chinese CNS11643 Set 3 */ -#define LEADING_BYTE_CHINESE_CNS11643_3 ('I' + CHARSET_ID_OFFSET_94x94) +/* Chinese CNS11643 Set 3 2/4 2/{8,9,10,11} 4/9 (I) */ +#define LEADING_BYTE_CHINESE_CNS11643_3 183 -/* Chinese CNS11643 Set 4 */ -#define LEADING_BYTE_CHINESE_CNS11643_4 ('J' + CHARSET_ID_OFFSET_94x94) +/* Chinese CNS11643 Set 4 2/4 2/{8,9,10,11} 4/10 (J) */ +#define LEADING_BYTE_CHINESE_CNS11643_4 184 -/* Chinese CNS11643 Set 5 */ -#define LEADING_BYTE_CHINESE_CNS11643_5 ('K' + CHARSET_ID_OFFSET_94x94) +/* Chinese CNS11643 Set 5 2/4 2/{8,9,10,11} 4/11 (K) */ +#define LEADING_BYTE_CHINESE_CNS11643_5 185 -/* Chinese CNS11643 Set 6 */ -#define LEADING_BYTE_CHINESE_CNS11643_6 ('L' + CHARSET_ID_OFFSET_94x94) +/* Chinese CNS11643 Set 6 2/4 2/{8,9,10,11} 4/12 (L) */ +#define LEADING_BYTE_CHINESE_CNS11643_6 186 -/* Chinese CNS11643 Set 7 */ -#define LEADING_BYTE_CHINESE_CNS11643_7 ('M' + CHARSET_ID_OFFSET_94x94) +/* Chinese CNS11643 Set 7 2/4 2/{8,9,10,11} 4/13 (M) */ +#define LEADING_BYTE_CHINESE_CNS11643_7 187 -/* DPRK Hangul KPS 9566-1997 */ -#define LEADING_BYTE_KOREAN_KPS9566 ('N' + CHARSET_ID_OFFSET_94x94) +/* DPRK Hangul KPS 9566-1997 2/4 2/{8,9,10,11} 4/14 (N) */ +#define LEADING_BYTE_KOREAN_KPS9566 202 #define NUM_LEADING_BYTES (80 * 3 - MIN_LEADING_BYTE) @@ -236,9 +239,6 @@ struct Lisp_Charset /* Byte->character mapping table */ Lisp_Object decoding_table; - /* Character->byte mapping table */ - Lisp_Object encoding_table; - /* Range of character code */ Emchar ucs_min, ucs_max; @@ -287,7 +287,6 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_CHARS(cs) ((cs)->chars) #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) #define CHARSET_DECODING_TABLE(cs) ((cs)->decoding_table) -#define CHARSET_ENCODING_TABLE(cs) ((cs)->encoding_table) #define CHARSET_UCS_MIN(cs) ((cs)->ucs_min) #define CHARSET_UCS_MAX(cs) ((cs)->ucs_max) #define CHARSET_CODE_OFFSET(cs) ((cs)->code_offset) @@ -311,7 +310,6 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) #define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs)) -#define XCHARSET_ENCODING_TABLE(cs) CHARSET_ENCODING_TABLE(XCHARSET(cs)) #define XCHARSET_UCS_MIN(cs) CHARSET_UCS_MIN(XCHARSET(cs)) #define XCHARSET_UCS_MAX(cs) CHARSET_UCS_MAX(XCHARSET(cs)) #define XCHARSET_CODE_OFFSET(cs) CHARSET_CODE_OFFSET(XCHARSET(cs)) @@ -479,136 +477,153 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) } } -unsigned char charset_get_byte1 (Lisp_Object charset, Emchar ch); -unsigned char charset_get_byte2 (Lisp_Object charset, Emchar ch); +extern Lisp_Object Vcharacter_attribute_table; + +Lisp_Object range_charset_code_point (Lisp_Object charset, Emchar ch); +Lisp_Object charset_code_point (Lisp_Object charset, Emchar ch); extern Lisp_Object Vdefault_coded_charset_priority_list; EXFUN (Ffind_charset, 1); -INLINE_HEADER void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2); -INLINE_HEADER void -breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) +INLINE_HEADER Lisp_Object SPLIT_CHAR (Emchar c); +INLINE_HEADER Lisp_Object +SPLIT_CHAR (Emchar c) { - if (c < MIN_CHAR_OBS_94x94) + Lisp_Object cdef = get_char_code_table (c, Vcharacter_attribute_table); + + if (!EQ (cdef, Qnil)) { Lisp_Object charsets = Vdefault_coded_charset_priority_list; + Lisp_Object field; + while (!EQ (charsets, Qnil)) { - *charset = Ffind_charset (Fcar (charsets)); - if (!EQ (*charset, Qnil) - && (*c1 = charset_get_byte1 (*charset, c)) ) + Lisp_Object charset = Ffind_charset (Fcar (charsets)); + + if (!EQ (charset, Qnil)) { - *c2 = charset_get_byte2 (*charset, c); - return; + if (!EQ (field = Fcdr (Fassq (charset, cdef)), Qnil) || + !EQ (field = range_charset_code_point (charset, c), Qnil)) + return Fcons (charset, field); } charsets = Fcdr (charsets); } - /* otherwise --- maybe for bootstrap */ + } + + /* otherwise --- maybe for bootstrap */ + if (c < MIN_CHAR_OBS_94x94) + { if (c <= MAX_CHAR_BASIC_LATIN) { - *charset = Vcharset_ascii; - *c1 = charset_get_byte1 (*charset, c); - *c2 = charset_get_byte2 (*charset, c); + return list2 (Vcharset_ascii, make_int (c)); } else if (c < 0xA0) { - *charset = Vcharset_control_1; - *c1 = charset_get_byte1 (*charset, c); - *c2 = charset_get_byte2 (*charset, c); + return list2 (Vcharset_control_1, make_int (c & 0x7F)); } else if (c <= 0xff) { - *charset = Vcharset_latin_iso8859_1; - *c1 = charset_get_byte1 (*charset, c); - *c2 = charset_get_byte2 (*charset, c); + return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F)); } else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK)) { - *charset = Vcharset_greek_iso8859_7; - *c1 = c - MIN_CHAR_GREEK + 0x20; - *c2 = 0; + return list2 (Vcharset_greek_iso8859_7, + make_int (c - MIN_CHAR_GREEK + 0x20)); } else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC)) { - *charset = Vcharset_cyrillic_iso8859_5; - *c1 = c - MIN_CHAR_CYRILLIC + 0x20; - *c2 = 0; + return list2 (Vcharset_cyrillic_iso8859_5, + make_int (c - MIN_CHAR_CYRILLIC + 0x20)); } else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW)) { - *charset = Vcharset_hebrew_iso8859_8; - *c1 = c - MIN_CHAR_HEBREW + 0x20; - *c2 = 0; + return list2 (Vcharset_hebrew_iso8859_8, + make_int (c - MIN_CHAR_HEBREW + 0x20)); } else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI)) { - *charset = Vcharset_thai_tis620; - *c1 = c - MIN_CHAR_THAI + 0x20; - *c2 = 0; + return list2 (Vcharset_thai_tis620, + make_int (c - MIN_CHAR_THAI + 0x20)); } else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c) && (c <= MAX_CHAR_HALFWIDTH_KATAKANA)) { - *charset = Vcharset_katakana_jisx0201; - *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 33; - *c2 = 0; + return list2 (Vcharset_katakana_jisx0201, + make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33)); } else { - *charset = Vcharset_ucs_bmp; - *c1 = c >> 8; - *c2 = c & 0xff; + return list3 (Vcharset_ucs_bmp, + make_int (c >> 8), make_int (c & 0xff)); } } else if (c <= MAX_CHAR_OBS_94x94) { - *charset - = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, - ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@', - CHARSET_LEFT_TO_RIGHT); - *c1 = (((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33; - *c2 = ((c - MIN_CHAR_OBS_94x94) % 94) + 33; + return list3 (CHARSET_BY_ATTRIBUTES + (CHARSET_TYPE_94X94, + ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@', + CHARSET_LEFT_TO_RIGHT), + make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33), + make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33)); } else if (c <= MAX_CHAR_94) { - *charset - = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94, - ((c - MIN_CHAR_94) / 94) + '0', - CHARSET_LEFT_TO_RIGHT); - *c1 = ((c - MIN_CHAR_94) % 94) + 33; - *c2 = 0; + return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94, + ((c - MIN_CHAR_94) / 94) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int (((c - MIN_CHAR_94) % 94) + 33)); } else if (c <= MAX_CHAR_96) { - *charset - = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96, - ((c - MIN_CHAR_96) / 96) + '0', - CHARSET_LEFT_TO_RIGHT); - *c1 = ((c - MIN_CHAR_96) % 96) + 32; - *c2 = 0; + return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96, + ((c - MIN_CHAR_96) / 96) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int (((c - MIN_CHAR_96) % 96) + 32)); } else if (c <= MAX_CHAR_94x94) { - *charset - = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, - ((c - MIN_CHAR_94x94) / (94 * 94)) + '0', - CHARSET_LEFT_TO_RIGHT); - *c1 = (((c - MIN_CHAR_94x94) / 94) % 94) + 33; - *c2 = ((c - MIN_CHAR_94x94) % 94) + 33; + return list3 (CHARSET_BY_ATTRIBUTES + (CHARSET_TYPE_94X94, + ((c - MIN_CHAR_94x94) / (94 * 94)) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33), + make_int (((c - MIN_CHAR_94x94) % 94) + 33)); } else if (c <= MAX_CHAR_96x96) { - *charset - = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96, - ((c - MIN_CHAR_96x96) / (96 * 96)) + '0', - CHARSET_LEFT_TO_RIGHT); - *c1 = (((c - MIN_CHAR_96x96) / 96) % 96) + 32; - *c2 = ((c - MIN_CHAR_96x96) % 96) + 32; + return list3 (CHARSET_BY_ATTRIBUTES + (CHARSET_TYPE_96X96, + ((c - MIN_CHAR_96x96) / (96 * 96)) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32), + make_int (((c - MIN_CHAR_96x96) % 96) + 32)); + } + else + { + return Qnil; + } +} + +INLINE_HEADER void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2); +INLINE_HEADER void +breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) +{ + Lisp_Object ret = SPLIT_CHAR (c); + + *charset = Fcar (ret); + ret = Fcdr (ret); + if (INTP (Fcar (ret))) + { + *c1 = XINT (Fcar (ret)); + ret = Fcdr (ret); + if (INTP (Fcar (ret))) + *c2 = XINT (Fcar (ret)); + else + *c2 = 0; } else { - printf("u+%x", c); - abort(); + *c1 = *c2 = 0; } } diff --git a/src/mule-charset.c b/src/mule-charset.c index 8fd87a9..b477a3d 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -316,6 +316,49 @@ put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table) } +Lisp_Object Vcharacter_attribute_table; + +DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /* +Return the alist of attributes of CHARACTER. +*/ + (character)) +{ + return get_char_code_table (XCHAR (character), Vcharacter_attribute_table); +} + +DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /* +Return the value of CHARACTER's ATTRIBUTE. +*/ + (character, attribute)) +{ + Lisp_Object ret + = get_char_code_table (XCHAR (character), Vcharacter_attribute_table); + + if (EQ (ret, Qnil)) + return Qnil; + + return Fcdr (Fassq (attribute, ret)); +} + +DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /* +Store CHARACTER's ATTRIBUTE with VALUE. +*/ + (character, attribute, value)) +{ + Emchar char_code = XCHAR (character); + Lisp_Object ret + = get_char_code_table (char_code, Vcharacter_attribute_table); + Lisp_Object cell = Fassq (attribute, ret); + + if (EQ (cell, Qnil)) + ret = Fcons (Fcons (attribute, value), ret); + else + Fsetcdr (cell, value); + put_char_code_table (char_code, ret, Vcharacter_attribute_table); + return ret; +} + + Lisp_Object Vutf_2000_version; #endif @@ -706,7 +749,6 @@ mark_charset (Lisp_Object obj) mark_object (cs->ccl_program); #ifdef UTF2000 mark_object (cs->decoding_table); - mark_object (cs->encoding_table); #endif return cs->name; } @@ -755,7 +797,6 @@ static const struct lrecord_description charset_description[] = { { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, #ifdef UTF2000 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) }, - { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) }, #endif { XD_END } }; @@ -799,7 +840,6 @@ make_charset (Charset_ID id, Lisp_Object name, CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; #ifdef UTF2000 CHARSET_DECODING_TABLE(cs) = Qnil; - CHARSET_ENCODING_TABLE(cs) = Qnil; CHARSET_UCS_MIN(cs) = ucs_min; CHARSET_UCS_MAX(cs) = ucs_max; CHARSET_CODE_OFFSET(cs) = code_offset; @@ -921,46 +961,47 @@ get_unallocated_leading_byte (int dimension) } #ifdef UTF2000 -unsigned char -charset_get_byte1 (Lisp_Object charset, Emchar ch) +Lisp_Object +range_charset_code_point (Lisp_Object charset, Emchar ch) { - Lisp_Object table; int d; - if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil)) - { - Lisp_Object value = get_char_code_table (ch, table); - - if (INTP (value)) - { - Emchar code = XINT (value); - - if (code < (1 << 8)) - return code; - else if (code < (1 << 16)) - return code >> 8; - else if (code < (1 << 24)) - return code >> 16; - else - return code >> 24; - } - } if ((XCHARSET_UCS_MIN (charset) <= ch) && (ch <= XCHARSET_UCS_MAX (charset))) - return (ch - XCHARSET_UCS_MIN (charset) - + XCHARSET_CODE_OFFSET (charset)) - / (XCHARSET_DIMENSION (charset) == 1 ? - 1 - : - XCHARSET_DIMENSION (charset) == 2 ? - XCHARSET_CHARS (charset) - : - XCHARSET_DIMENSION (charset) == 3 ? - XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset) - : - XCHARSET_CHARS (charset) - * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - + XCHARSET_BYTE_OFFSET (charset); + { + d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset); + + if (XCHARSET_DIMENSION (charset) == 1) + return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset))); + else if (XCHARSET_DIMENSION (charset) == 2) + return list2 (make_int (d / XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + else if (XCHARSET_DIMENSION (charset) == 3) + return list3 (make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / XCHARSET_CHARS (charset) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + else /* if (XCHARSET_DIMENSION (charset) == 4) */ + return list4 (make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / XCHARSET_CHARS (charset) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + } else if (XCHARSET_CODE_OFFSET (charset) == 0) { if (XCHARSET_DIMENSION (charset) == 1) @@ -970,17 +1011,17 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) if (((d = ch - (MIN_CHAR_94 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0) && (d < 94)) - return d + 33; + return list1 (make_int (d + 33)); } else if (XCHARSET_CHARS (charset) == 96) { if (((d = ch - (MIN_CHAR_96 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0) && (d < 96)) - return d + 32; + return list1 (make_int (d + 32)); } else - return 0; + return Qnil; } else if (XCHARSET_DIMENSION (charset) == 2) { @@ -990,7 +1031,7 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0) && (d < 94 * 94)) - return (d / 94) + 33; + return list2 ((d / 94) + 33, d % 94 + 33); } else if (XCHARSET_CHARS (charset) == 96) { @@ -998,64 +1039,26 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0) && (d < 96 * 96)) - return (d / 96) + 32; + return list2 ((d / 96) + 32, d % 96 + 32); } } } - return 0; + return Qnil; } -unsigned char -charset_get_byte2 (Lisp_Object charset, Emchar ch) +Lisp_Object +charset_code_point (Lisp_Object charset, Emchar ch) { - if (XCHARSET_DIMENSION (charset) == 1) - return 0; - else + Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table); + + if (!EQ (cdef, Qnil)) { - Lisp_Object table; + Lisp_Object field = Fassq (charset, cdef); - if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil)) - { - Lisp_Object value = get_char_code_table (ch, table); - - if (INTP (value)) - { - Emchar code = XINT (value); - - if (code < (1 << 16)) - return (unsigned char)code; - else if (code < (1 << 24)) - return (unsigned char)(code >> 16); - else - return (unsigned char)(code >> 24); - } - } - if ((XCHARSET_UCS_MIN (charset) <= ch) - && (ch <= XCHARSET_UCS_MAX (charset))) - return ((ch - XCHARSET_UCS_MIN (charset) - + XCHARSET_CODE_OFFSET (charset)) - / (XCHARSET_DIMENSION (charset) == 2 ? - 1 - : - XCHARSET_DIMENSION (charset) == 3 ? - XCHARSET_CHARS (charset) - : - XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))) - % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset); - else if (XCHARSET_CHARS (charset) == 94) - return (MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch) - && (ch < MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ? - ((ch - MIN_CHAR_94x94) % 94) + 33 : 0; - else /* if (XCHARSET_CHARS (charset) == 96) */ - return (MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch) - && (ch < MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ? - ((ch - MIN_CHAR_96x96) % 96) + 32 : 0; + if (!EQ (field, Qnil)) + return Fcdr (field); } + return range_charset_code_point (charset, ch); } Lisp_Object Vdefault_coded_charset_priority_list; @@ -1207,10 +1210,7 @@ character set. Recognized properties are: Lisp_Object charset; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; -#ifdef UTF2000 - Emchar code_offset = 0; unsigned char byte_offset = 0; -#endif CHECK_SYMBOL (name); if (!NILP (doc_string)) @@ -1326,60 +1326,7 @@ character set. Recognized properties are: error ("Character set already defined for this DIMENSION/CHARS/FINAL combo"); -#ifdef UTF2000 - if (dimension == 1) - { - if (chars == 94) - { - if (code_offset == 0) - id = CHARSET_ID_OFFSET_94 + final; - else - id = get_unallocated_leading_byte (dimension); - } - else if (chars == 96) - { - if (code_offset == 0) - id = CHARSET_ID_OFFSET_96 + final; - else - id = get_unallocated_leading_byte (dimension); - } - else - { - abort (); - } - } - else if (dimension == 2) - { - if (chars == 94) - { - if (code_offset == 0) - id = CHARSET_ID_OFFSET_94x94 + final; - else - id = get_unallocated_leading_byte (dimension); - } - else if (chars == 96) - { - id = get_unallocated_leading_byte (dimension); - } - else - { - abort (); - } - } - else - { - abort (); - } - if (final) - { - if (chars == 94) - byte_offset = 33; - else if (chars == 96) - byte_offset = 32; - } -#else id = get_unallocated_leading_byte (dimension); -#endif if (NILP (doc_string)) doc_string = build_string (""); @@ -1689,14 +1636,13 @@ Set mapping-table of CHARSET to TABLE. if (EQ (table, Qnil)) { CHARSET_DECODING_TABLE(cs) = table; - CHARSET_ENCODING_TABLE(cs) = Qnil; return table; } else if (VECTORP (table)) { if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs)) args_out_of_range (table, make_int (CHARSET_CHARS (cs))); - old_table = CHARSET_ENCODING_TABLE(cs); + old_table = CHARSET_DECODING_TABLE(cs); CHARSET_DECODING_TABLE(cs) = table; } else @@ -1708,19 +1654,17 @@ Set mapping-table of CHARSET to TABLE. switch (CHARSET_DIMENSION (cs)) { case 1: - CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil); for (i = 0; i < XVECTOR_LENGTH (table); i++) { Lisp_Object c = XVECTOR_DATA(table)[i]; if (CHARP (c)) - put_char_code_table (XCHAR (c), - make_int (i + CHARSET_BYTE_OFFSET (cs)), - CHARSET_ENCODING_TABLE(cs)); + Fput_char_attribute + (c, charset, + list1 (make_int (i + CHARSET_BYTE_OFFSET (cs)))); } break; case 2: - CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil); for (i = 0; i < XVECTOR_LENGTH (table); i++) { Lisp_Object v = XVECTOR_DATA(table)[i]; @@ -1739,17 +1683,18 @@ Set mapping-table of CHARSET to TABLE. Lisp_Object c = XVECTOR_DATA(v)[j]; if (CHARP (c)) - put_char_code_table - (XCHAR (c), - make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8) - | (j + CHARSET_BYTE_OFFSET (cs))), - CHARSET_ENCODING_TABLE(cs)); + Fput_char_attribute (c, charset, + list2 + (make_int + (i + CHARSET_BYTE_OFFSET (cs)), + make_int + (j + CHARSET_BYTE_OFFSET (cs)))); } } else if (CHARP (v)) - put_char_code_table (XCHAR (v), - make_int (i + CHARSET_BYTE_OFFSET (cs)), - CHARSET_ENCODING_TABLE(cs)); + Fput_char_attribute (v, charset, + list1 + (make_int (i + CHARSET_BYTE_OFFSET (cs)))); } break; } @@ -1986,6 +1931,9 @@ syms_of_mule_charset (void) DEFSUBR (Fset_charset_ccl_program); DEFSUBR (Fset_charset_registry); #ifdef UTF2000 + DEFSUBR (Fchar_attribute_alist); + DEFSUBR (Fget_char_attribute); + DEFSUBR (Fput_char_attribute); DEFSUBR (Fcharset_mapping_table); DEFSUBR (Fset_charset_mapping_table); #endif @@ -2095,11 +2043,14 @@ Leading-code of private TYPE9N charset of column-width 1. #endif #ifdef UTF2000 - Vutf_2000_version = build_string("0.9 (Kyūhōji)"); + Vutf_2000_version = build_string("0.10 (Yao)"); DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /* Version number of UTF-2000. */ ); + staticpro (&Vcharacter_attribute_table); + Vcharacter_attribute_table = make_char_code_table (Qnil); + Vdefault_coded_charset_priority_list = Qnil; DEFVAR_LISP ("default-coded-charset-priority-list", &Vdefault_coded_charset_priority_list /* diff --git a/src/text-coding.c b/src/text-coding.c index 4f40729..c1bd8b2 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -3240,16 +3240,16 @@ char_encode_shift_jis (struct encoding_stream *str, Emchar ch, } else { - Lisp_Object charset, value; + Lisp_Object charset; unsigned int c1, c2, s1, s2; - #ifdef UTF2000 - if (INTP (value = - get_char_code_table - (ch, XCHARSET_ENCODING_TABLE (Vcharset_latin_jisx0201)))) + Lisp_Object value = charset_code_point (Vcharset_latin_jisx0201, ch); + Lisp_Object ret = Fcar (value); + + if (INTP (ret)) { charset = Vcharset_latin_jisx0201; - c1 = XINT (value); + c1 = XINT (ret); c2 = 0; } else @@ -5046,15 +5046,33 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch, reg = -1; for (i = 0; i < 4; i++) { + Lisp_Object code_point; + if ((CHARSETP (charset = str->iso2022.charset[i]) - && (byte1 = charset_get_byte1 (charset, ch))) || + && !EQ (code_point = charset_code_point (charset, ch), Qnil)) + || (CHARSETP (charset = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)) - && (byte1 = charset_get_byte1 (charset, ch)))) + && !EQ (code_point = charset_code_point (charset, ch), Qnil))) { + Lisp_Object ret = Fcar (code_point); + + if (INTP (ret)) + { + byte1 = XINT (ret); + ret = Fcar (Fcdr (code_point)); + if (INTP (ret)) + byte2 = XINT (ret); + else + byte2 = 0; + } + else + { + byte1 = 0; + byte2 = 0; + } reg = i; - byte2 = charset_get_byte2 (charset, ch); break; } } -- 1.7.10.4