X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fchar-ucs.h;h=b5160338e686658b19ebc340fb63993bd1ce0956;hb=153c92fa2a3b77ce954a1e54a5dc9ec15370cfd6;hp=199e4b792775a70b6d349fdacda920dec3b1c66b;hpb=16a1b20ac010ca750c748b899b25fa0d6248c58a;p=chise%2Fxemacs-chise.git- diff --git a/src/char-ucs.h b/src/char-ucs.h index 199e4b7..b516033 100644 --- a/src/char-ucs.h +++ b/src/char-ucs.h @@ -1,5 +1,5 @@ /* Header for UCS-4 character representation. - Copyright (C) 1999,2000,2001 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko This file is part of XEmacs. @@ -24,13 +24,35 @@ Boston, MA 02111-1307, USA. */ #include "chartab.h" #include "elhash.h" +EXFUN (Fmake_directory_internal, 1); + extern Lisp_Object Vchar_attribute_hash_table; #define valid_char_p(ch) 1 #define CHAR_ASCII_P(ch) ((ch) <= 0x7F) + +/************************************************************************/ +/* Exported functions */ +/************************************************************************/ + +extern Lisp_Object Vcharset_ucs; extern Lisp_Object Vcharset_latin_jisx0201; +extern Lisp_Object Vcharset_chinese_big5; +extern Lisp_Object Vcharset_chinese_big5_1; +extern Lisp_Object Vcharset_chinese_big5_2; +extern Lisp_Object Vcharset_japanese_jisx0208; +extern Lisp_Object Vcharset_japanese_jisx0208_1990; +extern Lisp_Object Vcharset_japanese_jisx0212; + +EXFUN (Fget_charset, 1); + +extern Lisp_Object Qucs; + +Lisp_Object put_char_ccs_code_point (Lisp_Object character, + Lisp_Object ccs, Lisp_Object value); +Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs); /************************************************************************/ @@ -40,102 +62,96 @@ extern Lisp_Object Vcharset_latin_jisx0201; typedef short Charset_ID; #define MIN_LEADING_BYTE -0x200 -#define CHARSET_ID_OFFSET 0x00 /* ISO/IEC 10646 */ -#define LEADING_BYTE_UCS (CHARSET_ID_OFFSET - 1) +#define LEADING_BYTE_UCS (MIN_LEADING_BYTE + 1) /* represent normal 80-9F */ -#define LEADING_BYTE_CONTROL_1 (CHARSET_ID_OFFSET - 2) +#define LEADING_BYTE_CONTROL_1 (MIN_LEADING_BYTE + 2) /* ISO/IEC 10646 BMP */ -#define LEADING_BYTE_UCS_BMP (CHARSET_ID_OFFSET - 3) +#define LEADING_BYTE_UCS_BMP (MIN_LEADING_BYTE + 3) /* ISO/IEC 10646 SMP */ -#define LEADING_BYTE_UCS_SMP (CHARSET_ID_OFFSET - 4) +#define LEADING_BYTE_UCS_SMP (MIN_LEADING_BYTE + 4) /* ISO/IEC 10646 SIP */ -#define LEADING_BYTE_UCS_SIP (CHARSET_ID_OFFSET - 5) +#define LEADING_BYTE_UCS_SIP (MIN_LEADING_BYTE + 5) /* ISO/IEC 10646 for CNS */ -#define LEADING_BYTE_UCS_CNS (CHARSET_ID_OFFSET - 6) +#define LEADING_BYTE_UCS_CNS (MIN_LEADING_BYTE + 6) /* ISO/IEC 10646 for JIS */ -#define LEADING_BYTE_UCS_JIS (CHARSET_ID_OFFSET - 7) +#define LEADING_BYTE_UCS_JIS (MIN_LEADING_BYTE + 7) /* ISO/IEC 10646 for KS */ -#define LEADING_BYTE_UCS_KS (CHARSET_ID_OFFSET - 8) +#define LEADING_BYTE_UCS_KS (MIN_LEADING_BYTE + 8) /* ISO/IEC 10646 for Big5 */ -#define LEADING_BYTE_UCS_BIG5 (CHARSET_ID_OFFSET - 9) +#define LEADING_BYTE_UCS_BIG5 (MIN_LEADING_BYTE + 9) /* Japanese JIS X0208-1990 2/4 2/{(8),9,10,11} 4/2 (B) */ -#define LEADING_BYTE_JAPANESE_JISX0208_1990 (CHARSET_ID_OFFSET - 10) +#define LEADING_BYTE_JAPANESE_JISX0208_1990 (MIN_LEADING_BYTE + 10) /* Chinese GB 12345-1990 */ -#define LEADING_BYTE_CHINESE_GB12345 (CHARSET_ID_OFFSET - 11) +#define LEADING_BYTE_CHINESE_GB12345 (MIN_LEADING_BYTE + 11) -#define LEADING_BYTE_CHINESE_BIG5 (CHARSET_ID_OFFSET - 20) +#define LEADING_BYTE_CHINESE_BIG5 (MIN_LEADING_BYTE + 20) /* Big5 Level 1 2/4 2/{(8),9,10,11} 4/0 '0' */ -#define LEADING_BYTE_CHINESE_BIG5_1 (CHARSET_ID_OFFSET - 21) +#define LEADING_BYTE_CHINESE_BIG5_1 (MIN_LEADING_BYTE + 21) /* Big5 Level 2 2/4 2/{(8),9,10,11} 4/0 '1' */ -#define LEADING_BYTE_CHINESE_BIG5_2 (CHARSET_ID_OFFSET - 22) +#define LEADING_BYTE_CHINESE_BIG5_2 (MIN_LEADING_BYTE + 22) /* VISCII 1.1 */ -#define LEADING_BYTE_LATIN_VISCII (CHARSET_ID_OFFSET - 24) +#define LEADING_BYTE_LATIN_VISCII (MIN_LEADING_BYTE + 24) /* MULE VISCII-LOWER (CHARSET_ID_OFFSET_96 + '1') */ -#define LEADING_BYTE_LATIN_VISCII_LOWER (CHARSET_ID_OFFSET - 25) +#define LEADING_BYTE_LATIN_VISCII_LOWER (MIN_LEADING_BYTE + 25) /* MULE VISCII-UPPER (CHARSET_ID_OFFSET_96 + '2') */ -#define LEADING_BYTE_LATIN_VISCII_UPPER (CHARSET_ID_OFFSET - 26) - -#define LEADING_BYTE_ETHIOPIC_UCS (CHARSET_ID_OFFSET - 27) - -#define LEADING_BYTE_DAIKANWA_0 (CHARSET_ID_OFFSET - 28) -#define LEADING_BYTE_DAIKANWA_1 (CHARSET_ID_OFFSET - 29) -#define LEADING_BYTE_DAIKANWA_2 (CHARSET_ID_OFFSET - 30) -#define LEADING_BYTE_DAIKANWA_3 (CHARSET_ID_OFFSET - 31) - -#define LEADING_BYTE_GT (CHARSET_ID_OFFSET - 40) -#define LEADING_BYTE_GT_PJ_1 (CHARSET_ID_OFFSET - 41) -#define LEADING_BYTE_GT_PJ_2 (CHARSET_ID_OFFSET - 42) -#define LEADING_BYTE_GT_PJ_3 (CHARSET_ID_OFFSET - 43) -#define LEADING_BYTE_GT_PJ_4 (CHARSET_ID_OFFSET - 44) -#define LEADING_BYTE_GT_PJ_5 (CHARSET_ID_OFFSET - 45) -#define LEADING_BYTE_GT_PJ_6 (CHARSET_ID_OFFSET - 46) -#define LEADING_BYTE_GT_PJ_7 (CHARSET_ID_OFFSET - 47) -#define LEADING_BYTE_GT_PJ_8 (CHARSET_ID_OFFSET - 48) -#define LEADING_BYTE_GT_PJ_9 (CHARSET_ID_OFFSET - 49) -#define LEADING_BYTE_GT_PJ_10 (CHARSET_ID_OFFSET - 50) -#define LEADING_BYTE_GT_PJ_11 (CHARSET_ID_OFFSET - 51) - -#define LEADING_BYTE_CHINA3_JEF (CHARSET_ID_OFFSET - 82) -#define LEADING_BYTE_CBETA (CHARSET_ID_OFFSET - 83) -#define LEADING_BYTE_CHINESE_BIG5_CDP (CHARSET_ID_OFFSET - 84) -#define LEADING_BYTE_HANZIKU_1 (CHARSET_ID_OFFSET - 85) -#define LEADING_BYTE_HANZIKU_2 (CHARSET_ID_OFFSET - 86) -#define LEADING_BYTE_HANZIKU_3 (CHARSET_ID_OFFSET - 87) -#define LEADING_BYTE_HANZIKU_4 (CHARSET_ID_OFFSET - 88) -#define LEADING_BYTE_HANZIKU_5 (CHARSET_ID_OFFSET - 89) -#define LEADING_BYTE_HANZIKU_6 (CHARSET_ID_OFFSET - 90) -#define LEADING_BYTE_HANZIKU_7 (CHARSET_ID_OFFSET - 91) -#define LEADING_BYTE_HANZIKU_8 (CHARSET_ID_OFFSET - 92) -#define LEADING_BYTE_HANZIKU_9 (CHARSET_ID_OFFSET - 93) -#define LEADING_BYTE_HANZIKU_10 (CHARSET_ID_OFFSET - 94) -#define LEADING_BYTE_HANZIKU_11 (CHARSET_ID_OFFSET - 95) -#define LEADING_BYTE_HANZIKU_12 (CHARSET_ID_OFFSET - 96) - -#define MIN_LEADING_BYTE_PRIVATE MIN_LEADING_BYTE -#define MAX_LEADING_BYTE_PRIVATE (CHARSET_ID_OFFSET - 97) - - -/* #define CHARSET_ID_OFFSET_94 (CHARSET_ID_OFFSET - '0') */ - -/* #define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0') */ -/* #define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?') */ +#define LEADING_BYTE_LATIN_VISCII_UPPER (MIN_LEADING_BYTE + 26) + +#define LEADING_BYTE_ETHIOPIC_UCS (MIN_LEADING_BYTE + 27) + +#define LEADING_BYTE_DAIKANWA_0 (MIN_LEADING_BYTE + 28) +#define LEADING_BYTE_DAIKANWA_1 (MIN_LEADING_BYTE + 29) +#define LEADING_BYTE_DAIKANWA_2 (MIN_LEADING_BYTE + 30) +#define LEADING_BYTE_DAIKANWA_3 (MIN_LEADING_BYTE + 31) + +#define LEADING_BYTE_GT (MIN_LEADING_BYTE + 40) +#define LEADING_BYTE_GT_PJ_1 (MIN_LEADING_BYTE + 41) +#define LEADING_BYTE_GT_PJ_2 (MIN_LEADING_BYTE + 42) +#define LEADING_BYTE_GT_PJ_3 (MIN_LEADING_BYTE + 43) +#define LEADING_BYTE_GT_PJ_4 (MIN_LEADING_BYTE + 44) +#define LEADING_BYTE_GT_PJ_5 (MIN_LEADING_BYTE + 45) +#define LEADING_BYTE_GT_PJ_6 (MIN_LEADING_BYTE + 46) +#define LEADING_BYTE_GT_PJ_7 (MIN_LEADING_BYTE + 47) +#define LEADING_BYTE_GT_PJ_8 (MIN_LEADING_BYTE + 48) +#define LEADING_BYTE_GT_PJ_9 (MIN_LEADING_BYTE + 49) +#define LEADING_BYTE_GT_PJ_10 (MIN_LEADING_BYTE + 50) +#define LEADING_BYTE_GT_PJ_11 (MIN_LEADING_BYTE + 51) + +#define LEADING_BYTE_CHINA3_JEF (MIN_LEADING_BYTE + 82) +#define LEADING_BYTE_CBETA (MIN_LEADING_BYTE + 83) +/* #define LEADING_BYTE_CHINESE_BIG5_CDP (MIN_LEADING_BYTE + 84) */ +#define LEADING_BYTE_HANZIKU_1 (MIN_LEADING_BYTE + 85) +#define LEADING_BYTE_HANZIKU_2 (MIN_LEADING_BYTE + 86) +#define LEADING_BYTE_HANZIKU_3 (MIN_LEADING_BYTE + 87) +#define LEADING_BYTE_HANZIKU_4 (MIN_LEADING_BYTE + 88) +#define LEADING_BYTE_HANZIKU_5 (MIN_LEADING_BYTE + 89) +#define LEADING_BYTE_HANZIKU_6 (MIN_LEADING_BYTE + 90) +#define LEADING_BYTE_HANZIKU_7 (MIN_LEADING_BYTE + 91) +#define LEADING_BYTE_HANZIKU_8 (MIN_LEADING_BYTE + 92) +#define LEADING_BYTE_HANZIKU_9 (MIN_LEADING_BYTE + 93) +#define LEADING_BYTE_HANZIKU_10 (MIN_LEADING_BYTE + 94) +#define LEADING_BYTE_HANZIKU_11 (MIN_LEADING_BYTE + 95) +#define LEADING_BYTE_HANZIKU_12 (MIN_LEADING_BYTE + 96) + +#define MIN_LEADING_BYTE_PRIVATE (MIN_LEADING_BYTE + 97) +#define MAX_LEADING_BYTE_PRIVATE -1 + /* ISO 646 IRV */ #define LEADING_BYTE_ASCII 6 /* (CHARSET_ID_OFFSET_94 + 'B') */ @@ -327,6 +343,18 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_MOTHER(cs) ((cs)->mother) #define CHARSET_CONVERSION(cs) ((cs)->conversion) +INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs); +INLINE_HEADER int +CHARSET_BYTE_SIZE (Lisp_Charset* cs) +{ + /* ad-hoc method for `ascii' */ + if ((CHARSET_CHARS (cs) == 94) && + (CHARSET_BYTE_OFFSET (cs) != 33)) + return 128 - CHARSET_BYTE_OFFSET (cs); + else + return CHARSET_CHARS (cs); +} + INLINE_HEADER Lisp_Object CHARSET_ENCODING_TABLE (Lisp_Charset* cs); INLINE_HEADER Lisp_Object CHARSET_ENCODING_TABLE (Lisp_Charset* cs) @@ -353,6 +381,7 @@ CHARSET_ENCODING_TABLE (Lisp_Charset* cs) #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) +#define XCHARSET_BYTE_SIZE(cs) CHARSET_BYTE_SIZE (XCHARSET (cs)) #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) #define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs)) @@ -497,45 +526,160 @@ CHARSET_BY_ATTRIBUTES (int chars, int dimension, int final, int dir) Emchar decode_builtin_char (Lisp_Object charset, int code_point); -extern Lisp_Object Vcharset_chinese_big5; -extern Lisp_Object Vcharset_chinese_big5_1; -extern Lisp_Object Vcharset_chinese_big5_2; +INLINE_HEADER Lisp_Object +get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code); +INLINE_HEADER Lisp_Object +get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code) +{ + int byte_offset = XCHARSET_BYTE_OFFSET (ccs); -INLINE_HEADER Emchar -DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point); -INLINE_HEADER Emchar -DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point) + if (VECTORP (table)) + { + int idx = code - byte_offset; + + if (idx < XVECTOR_LENGTH(table)) + return XVECTOR_DATA(table)[idx]; + else + return Qunbound; + } + else + return table; +} + +INLINE_HEADER Lisp_Object +put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code, + Lisp_Object value); +INLINE_HEADER Lisp_Object +put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code, + Lisp_Object value) { - int dim = XCHARSET_DIMENSION (charset); - Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (charset); - int idx; - Lisp_Object ch; + int byte_offset = XCHARSET_BYTE_OFFSET (ccs); + int ccs_len = XCHARSET_BYTE_SIZE (ccs); + + if (VECTORP (table)) + { + XVECTOR_DATA(table)[code - byte_offset] = value; + return table; + } + else if (EQ (table, value)) + return table; + else + { + table = make_vector (ccs_len, table); + XVECTOR_DATA(table)[code - byte_offset] = value; + return table; + } +} + +INLINE_HEADER void +decoding_table_put_char (Lisp_Object ccs, + int code_point, Lisp_Object character); +INLINE_HEADER void +decoding_table_put_char (Lisp_Object ccs, + int code_point, Lisp_Object character) +{ +#if 1 + Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs); + int dim = XCHARSET_DIMENSION (ccs); + + if (dim == 1) + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, code_point, character); + else if (dim == 2) + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8)); + + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)code_point, character); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 8), table2); + } + else if (dim == 3) + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16)); + Lisp_Object table3 + = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8)); + + table3 = put_ccs_octet_table (table3, ccs, + (unsigned char)code_point, character); + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)(code_point >> 8), table3); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 16), table2); + } + else /* if (dim == 4) */ + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24)); + Lisp_Object table3 + = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16)); + Lisp_Object table4 + = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8)); + + table4 = put_ccs_octet_table (table4, ccs, + (unsigned char)code_point, character); + table3 = put_ccs_octet_table (table3, ccs, + (unsigned char)(code_point >> 8), table4); + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)(code_point >> 16), table3); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 24), table2); + } +#else + Lisp_Object v = XCHARSET_DECODING_TABLE (ccs); + int dim = XCHARSET_DIMENSION (ccs); + int byte_offset = XCHARSET_BYTE_OFFSET (ccs); + int i = -1; + Lisp_Object nv; + int ccs_len = XVECTOR_LENGTH (v); while (dim > 0) { dim--; - if ( VECTORP (decoding_table) - && ( 0 <= (idx = ((code_point >> (dim * 8)) - & 255) - XCHARSET_BYTE_OFFSET (charset)) ) - && ( idx < XVECTOR_LENGTH (decoding_table) ) - && !NILP (ch = XVECTOR_DATA(decoding_table)[idx]) ) + i = ((code_point >> (8 * dim)) & 255) - byte_offset; + nv = XVECTOR_DATA(v)[i]; + if (dim > 0) { - if (CHARP (ch)) - return XCHAR (ch); - else - decoding_table = ch; + if (!VECTORP (nv)) + { + if (EQ (nv, character)) + return; + else + nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil)); + } + v = nv; } else break; } - return -1; + XVECTOR_DATA(v)[i] = character; +#endif } +INLINE_HEADER void +decoding_table_remove_char (Lisp_Object ccs, int code_point); +INLINE_HEADER void +decoding_table_remove_char (Lisp_Object ccs, int code_point) +{ + decoding_table_put_char (ccs, code_point, Qunbound); +} + +#ifdef HAVE_DATABASE +Emchar load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point); +#endif + +Emchar decode_defined_char (Lisp_Object charset, int code_point); + INLINE_HEADER Emchar DECODE_CHAR (Lisp_Object charset, int code_point); INLINE_HEADER Emchar DECODE_CHAR (Lisp_Object charset, int code_point) { - Emchar char_id = DECODE_DEFINED_CHAR (charset, code_point); + Emchar char_id = decode_defined_char (charset, code_point); if (char_id >= 0) return char_id; @@ -675,21 +819,4 @@ CHAR_TO_CHARC (Emchar ch) return cc; } - -/************************************************************************/ -/* Exported functions */ -/************************************************************************/ - -EXFUN (Fget_charset, 1); - -extern Lisp_Object Qucs; - -extern Lisp_Object Vcharset_japanese_jisx0208; -extern Lisp_Object Vcharset_japanese_jisx0208_1990; -extern Lisp_Object Vcharset_japanese_jisx0212; - -Lisp_Object put_char_ccs_code_point (Lisp_Object character, - Lisp_Object ccs, Lisp_Object value); -Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs); - #endif /* INCLUDED_char_ucs_h_ */