X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fchar-ucs.h;h=7ea200d4bc050910f87ab4ed6a5f9c0a30aeaaaf;hb=25e5d28d57dbe61cb1a5366d8e4a48c0196b3b74;hp=9e4a14947bdb768d5d2b9e27000215344c508590;hpb=072a23651971368c35e5f05b78c57e77f7feb908;p=chise%2Fxemacs-chise.git diff --git a/src/char-ucs.h b/src/char-ucs.h index 9e4a149..7ea200d 100644 --- a/src/char-ucs.h +++ b/src/char-ucs.h @@ -1,5 +1,5 @@ /* Header for UCS-4 character representation. - Copyright (C) 1999,2000,2001 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko This file is part of XEmacs. @@ -30,7 +30,27 @@ extern Lisp_Object Vchar_attribute_hash_table; #define CHAR_ASCII_P(ch) ((ch) <= 0x7F) + +/************************************************************************/ +/* Exported functions */ +/************************************************************************/ + +extern Lisp_Object Vcharset_ucs; extern Lisp_Object Vcharset_latin_jisx0201; +extern Lisp_Object Vcharset_chinese_big5; +extern Lisp_Object Vcharset_chinese_big5_1; +extern Lisp_Object Vcharset_chinese_big5_2; +extern Lisp_Object Vcharset_japanese_jisx0208; +extern Lisp_Object Vcharset_japanese_jisx0208_1990; +extern Lisp_Object Vcharset_japanese_jisx0212; + +EXFUN (Fget_charset, 1); + +extern Lisp_Object Qucs; + +Lisp_Object put_char_ccs_code_point (Lisp_Object character, + Lisp_Object ccs, Lisp_Object value); +Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs); /************************************************************************/ @@ -40,102 +60,56 @@ extern Lisp_Object Vcharset_latin_jisx0201; typedef short Charset_ID; #define MIN_LEADING_BYTE -0x200 -#define CHARSET_ID_OFFSET 0x00 /* ISO/IEC 10646 */ -#define LEADING_BYTE_UCS (CHARSET_ID_OFFSET - 1) +#define LEADING_BYTE_UCS (MIN_LEADING_BYTE + 1) /* represent normal 80-9F */ -#define LEADING_BYTE_CONTROL_1 (CHARSET_ID_OFFSET - 2) +#define LEADING_BYTE_CONTROL_1 (MIN_LEADING_BYTE + 2) /* ISO/IEC 10646 BMP */ -#define LEADING_BYTE_UCS_BMP (CHARSET_ID_OFFSET - 3) +#define LEADING_BYTE_UCS_BMP (MIN_LEADING_BYTE + 3) /* ISO/IEC 10646 SMP */ -#define LEADING_BYTE_UCS_SMP (CHARSET_ID_OFFSET - 4) +#define LEADING_BYTE_UCS_SMP (MIN_LEADING_BYTE + 4) /* ISO/IEC 10646 SIP */ -#define LEADING_BYTE_UCS_SIP (CHARSET_ID_OFFSET - 5) - -/* ISO/IEC 10646 for CNS */ -#define LEADING_BYTE_UCS_CNS (CHARSET_ID_OFFSET - 6) - -/* ISO/IEC 10646 for JIS */ -#define LEADING_BYTE_UCS_JIS (CHARSET_ID_OFFSET - 7) +#define LEADING_BYTE_UCS_SIP (MIN_LEADING_BYTE + 5) -/* ISO/IEC 10646 for KS */ -#define LEADING_BYTE_UCS_KS (CHARSET_ID_OFFSET - 8) +/* ISO/IEC 10646 for GB */ +#define LEADING_BYTE_UCS_GB (MIN_LEADING_BYTE + 6) -/* ISO/IEC 10646 for Big5 */ -#define LEADING_BYTE_UCS_BIG5 (CHARSET_ID_OFFSET - 9) +/* Japanese JIS X0208 Common 2/4 2/{(8),9,10,11} 4/2 (B) */ +#define LEADING_BYTE_JIS_X0208 (MIN_LEADING_BYTE + 11) /* Japanese JIS X0208-1990 2/4 2/{(8),9,10,11} 4/2 (B) */ -#define LEADING_BYTE_JAPANESE_JISX0208_1990 (CHARSET_ID_OFFSET - 10) +#define LEADING_BYTE_JAPANESE_JISX0208_1990 (MIN_LEADING_BYTE + 12) /* Chinese GB 12345-1990 */ -#define LEADING_BYTE_CHINESE_GB12345 (CHARSET_ID_OFFSET - 11) +#define LEADING_BYTE_CHINESE_GB12345 (MIN_LEADING_BYTE + 13) -#define LEADING_BYTE_CHINESE_BIG5 (CHARSET_ID_OFFSET - 20) +#define LEADING_BYTE_CHINESE_BIG5 (MIN_LEADING_BYTE + 20) /* Big5 Level 1 2/4 2/{(8),9,10,11} 4/0 '0' */ -#define LEADING_BYTE_CHINESE_BIG5_1 (CHARSET_ID_OFFSET - 21) +#define LEADING_BYTE_CHINESE_BIG5_1 (MIN_LEADING_BYTE + 21) /* Big5 Level 2 2/4 2/{(8),9,10,11} 4/0 '1' */ -#define LEADING_BYTE_CHINESE_BIG5_2 (CHARSET_ID_OFFSET - 22) +#define LEADING_BYTE_CHINESE_BIG5_2 (MIN_LEADING_BYTE + 22) /* VISCII 1.1 */ -#define LEADING_BYTE_LATIN_VISCII (CHARSET_ID_OFFSET - 24) +#define LEADING_BYTE_LATIN_VISCII (MIN_LEADING_BYTE + 24) /* MULE VISCII-LOWER (CHARSET_ID_OFFSET_96 + '1') */ -#define LEADING_BYTE_LATIN_VISCII_LOWER (CHARSET_ID_OFFSET - 25) +#define LEADING_BYTE_LATIN_VISCII_LOWER (MIN_LEADING_BYTE + 25) /* MULE VISCII-UPPER (CHARSET_ID_OFFSET_96 + '2') */ -#define LEADING_BYTE_LATIN_VISCII_UPPER (CHARSET_ID_OFFSET - 26) - -#define LEADING_BYTE_ETHIOPIC_UCS (CHARSET_ID_OFFSET - 27) - -#define LEADING_BYTE_DAIKANWA_0 (CHARSET_ID_OFFSET - 28) -#define LEADING_BYTE_DAIKANWA_1 (CHARSET_ID_OFFSET - 29) -#define LEADING_BYTE_DAIKANWA_2 (CHARSET_ID_OFFSET - 30) -#define LEADING_BYTE_DAIKANWA_3 (CHARSET_ID_OFFSET - 31) - -#define LEADING_BYTE_GT (CHARSET_ID_OFFSET - 40) -#define LEADING_BYTE_GT_PJ_1 (CHARSET_ID_OFFSET - 41) -#define LEADING_BYTE_GT_PJ_2 (CHARSET_ID_OFFSET - 42) -#define LEADING_BYTE_GT_PJ_3 (CHARSET_ID_OFFSET - 43) -#define LEADING_BYTE_GT_PJ_4 (CHARSET_ID_OFFSET - 44) -#define LEADING_BYTE_GT_PJ_5 (CHARSET_ID_OFFSET - 45) -#define LEADING_BYTE_GT_PJ_6 (CHARSET_ID_OFFSET - 46) -#define LEADING_BYTE_GT_PJ_7 (CHARSET_ID_OFFSET - 47) -#define LEADING_BYTE_GT_PJ_8 (CHARSET_ID_OFFSET - 48) -#define LEADING_BYTE_GT_PJ_9 (CHARSET_ID_OFFSET - 49) -#define LEADING_BYTE_GT_PJ_10 (CHARSET_ID_OFFSET - 50) -#define LEADING_BYTE_GT_PJ_11 (CHARSET_ID_OFFSET - 51) - -#define LEADING_BYTE_CHINA3_JEF (CHARSET_ID_OFFSET - 82) -#define LEADING_BYTE_CBETA (CHARSET_ID_OFFSET - 83) -#define LEADING_BYTE_CHINESE_BIG5_CDP (CHARSET_ID_OFFSET - 84) -#define LEADING_BYTE_HANZIKU_1 (CHARSET_ID_OFFSET - 85) -#define LEADING_BYTE_HANZIKU_2 (CHARSET_ID_OFFSET - 86) -#define LEADING_BYTE_HANZIKU_3 (CHARSET_ID_OFFSET - 87) -#define LEADING_BYTE_HANZIKU_4 (CHARSET_ID_OFFSET - 88) -#define LEADING_BYTE_HANZIKU_5 (CHARSET_ID_OFFSET - 89) -#define LEADING_BYTE_HANZIKU_6 (CHARSET_ID_OFFSET - 90) -#define LEADING_BYTE_HANZIKU_7 (CHARSET_ID_OFFSET - 91) -#define LEADING_BYTE_HANZIKU_8 (CHARSET_ID_OFFSET - 92) -#define LEADING_BYTE_HANZIKU_9 (CHARSET_ID_OFFSET - 93) -#define LEADING_BYTE_HANZIKU_10 (CHARSET_ID_OFFSET - 94) -#define LEADING_BYTE_HANZIKU_11 (CHARSET_ID_OFFSET - 95) -#define LEADING_BYTE_HANZIKU_12 (CHARSET_ID_OFFSET - 96) - -#define MIN_LEADING_BYTE_PRIVATE MIN_LEADING_BYTE -#define MAX_LEADING_BYTE_PRIVATE (CHARSET_ID_OFFSET - 97) - - -/* #define CHARSET_ID_OFFSET_94 (CHARSET_ID_OFFSET - '0') */ - -/* #define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0') */ -/* #define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?') */ +#define LEADING_BYTE_LATIN_VISCII_UPPER (MIN_LEADING_BYTE + 26) + +#define LEADING_BYTE_ETHIOPIC_UCS (MIN_LEADING_BYTE + 27) + +#define MIN_LEADING_BYTE_PRIVATE (MIN_LEADING_BYTE + 97) +#define MAX_LEADING_BYTE_PRIVATE -1 + /* ISO 646 IRV */ #define LEADING_BYTE_ASCII 6 /* (CHARSET_ID_OFFSET_94 + 'B') */ @@ -296,7 +270,6 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset) #define XSETCHARSET(x, p) XSETRECORD (x, p, charset) #define CHARSETP(x) RECORDP (x, charset) -#define GC_CHARSETP(x) GC_RECORDP (x, charset) #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) @@ -327,6 +300,18 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_MOTHER(cs) ((cs)->mother) #define CHARSET_CONVERSION(cs) ((cs)->conversion) +INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs); +INLINE_HEADER int +CHARSET_BYTE_SIZE (Lisp_Charset* cs) +{ + /* ad-hoc method for `ascii' */ + if ((CHARSET_CHARS (cs) == 94) && + (CHARSET_BYTE_OFFSET (cs) != 33)) + return 128 - CHARSET_BYTE_OFFSET (cs); + else + return CHARSET_CHARS (cs); +} + INLINE_HEADER Lisp_Object CHARSET_ENCODING_TABLE (Lisp_Charset* cs); INLINE_HEADER Lisp_Object CHARSET_ENCODING_TABLE (Lisp_Charset* cs) @@ -337,7 +322,17 @@ CHARSET_ENCODING_TABLE (Lisp_Charset* cs) } #define CONVERSION_IDENTICAL 0 -#define CONVERSION_94x60 1 +#define CONVERSION_94 1 +#define CONVERSION_96 2 +#define CONVERSION_94x60 3 +#define CONVERSION_94x94 4 +#define CONVERSION_96x96 5 +#define CONVERSION_94x94x60 6 +#define CONVERSION_94x94x94 7 +#define CONVERSION_96x96x96 8 +#define CONVERSION_94x94x94x60 9 +#define CONVERSION_94x94x94x94 10 +#define CONVERSION_96x96x96x96 11 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) @@ -352,6 +347,7 @@ CHARSET_ENCODING_TABLE (Lisp_Charset* cs) #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) +#define XCHARSET_BYTE_SIZE(cs) CHARSET_BYTE_SIZE (XCHARSET (cs)) #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) #define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs)) @@ -446,12 +442,16 @@ CHARSET_BY_ATTRIBUTES (int chars, int dimension, int final, int dir) #define MIN_CHAR_SIP 0x020000 #define MAX_CHAR_SIP 0x02FFFF +#if 0 #define MIN_CHAR_DAIKANWA 0x00E00000 #define MAX_CHAR_DAIKANWA (MIN_CHAR_DAIKANWA + 50100) /* 0xE0FFFF */ + #define MIN_CHAR_CBETA 0x00E20000 #define MAX_CHAR_CBETA 0x00E2FFFF + #define MIN_CHAR_CHINA3_JEF 0x00E80000 #define MAX_CHAR_CHINA3_JEF 0x00E8FFFF +#endif #define MIN_CHAR_94 0x00E90940 #define MAX_CHAR_94 (MIN_CHAR_94 + 94 * 80 - 1) @@ -465,10 +465,13 @@ CHARSET_BY_ATTRIBUTES (int chars, int dimension, int final, int dir) #define MIN_CHAR_96x96 0xF4C000 #define MAX_CHAR_96x96 (MIN_CHAR_96x96 + 96 * 96 * 80 - 1) +/* #define MIN_CHAR_GT 0x61000000 -#define MAX_CHAR_GT (MIN_CHAR_GT + 66773) +#define MAX_CHAR_GT (MIN_CHAR_GT + 67547) +*/ #define MIN_CHAR_BIG5_CDP 0x62000000 #define MAX_CHAR_BIG5_CDP 0x6200FFFF +/* #define MIN_CHAR_HANZIKU_1 (0x62000000 + 65536 * 1) #define MAX_CHAR_HANZIKU_1 (0x62000000 + 65536 * 1 + 65535) #define MIN_CHAR_HANZIKU_2 (0x62000000 + 65536 * 2) @@ -493,48 +496,130 @@ CHARSET_BY_ATTRIBUTES (int chars, int dimension, int final, int dir) #define MAX_CHAR_HANZIKU_11 (0x62000000 + 65536 * 11 + 65535) #define MIN_CHAR_HANZIKU_12 (0x62000000 + 65536 * 12) #define MAX_CHAR_HANZIKU_12 (0x62000000 + 65536 * 12 + 65535) +*/ Emchar decode_builtin_char (Lisp_Object charset, int code_point); -extern Lisp_Object Vcharset_chinese_big5; -extern Lisp_Object Vcharset_chinese_big5_1; -extern Lisp_Object Vcharset_chinese_big5_2; - -INLINE_HEADER Emchar -DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point); -INLINE_HEADER Emchar -DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point) +INLINE_HEADER Lisp_Object +get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code); +INLINE_HEADER Lisp_Object +get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code) { - int dim = XCHARSET_DIMENSION (charset); - Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (charset); - int idx; - Lisp_Object ch; + int byte_offset = XCHARSET_BYTE_OFFSET (ccs); - while (dim > 0) + if (VECTORP (table)) { - dim--; - if ( VECTORP (decoding_table) - && ( 0 <= (idx = ((code_point >> (dim * 8)) - & 255) - XCHARSET_BYTE_OFFSET (charset)) ) - && ( idx < XVECTOR_LENGTH (decoding_table) ) - && !NILP (ch = XVECTOR_DATA(decoding_table)[idx]) ) - { - if (CHARP (ch)) - return XCHAR (ch); - else - decoding_table = ch; - } + int idx = code - byte_offset; + + if (idx < XVECTOR_LENGTH(table)) + return XVECTOR_DATA(table)[idx]; else - break; + return Qunbound; + } + else + return table; +} + +INLINE_HEADER Lisp_Object +put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code, + Lisp_Object value); +INLINE_HEADER Lisp_Object +put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code, + Lisp_Object value) +{ + int byte_offset = XCHARSET_BYTE_OFFSET (ccs); + int ccs_len = XCHARSET_BYTE_SIZE (ccs); + + if (VECTORP (table)) + { + XVECTOR_DATA(table)[code - byte_offset] = value; + return table; + } + else if (EQ (table, value)) + return table; + else + { + table = make_vector (ccs_len, table); + XVECTOR_DATA(table)[code - byte_offset] = value; + return table; + } +} + +INLINE_HEADER void +decoding_table_put_char (Lisp_Object ccs, + int code_point, Lisp_Object character); +INLINE_HEADER void +decoding_table_put_char (Lisp_Object ccs, + int code_point, Lisp_Object character) +{ + Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs); + int dim = XCHARSET_DIMENSION (ccs); + + if (dim == 1) + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, code_point, character); + else if (dim == 2) + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8)); + + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)code_point, character); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 8), table2); + } + else if (dim == 3) + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16)); + Lisp_Object table3 + = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8)); + + table3 = put_ccs_octet_table (table3, ccs, + (unsigned char)code_point, character); + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)(code_point >> 8), table3); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 16), table2); + } + else /* if (dim == 4) */ + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24)); + Lisp_Object table3 + = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16)); + Lisp_Object table4 + = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8)); + + table4 = put_ccs_octet_table (table4, ccs, + (unsigned char)code_point, character); + table3 = put_ccs_octet_table (table3, ccs, + (unsigned char)(code_point >> 8), table4); + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)(code_point >> 16), table3); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 24), table2); } - return -1; } +INLINE_HEADER void +decoding_table_remove_char (Lisp_Object ccs, int code_point); +INLINE_HEADER void +decoding_table_remove_char (Lisp_Object ccs, int code_point) +{ + decoding_table_put_char (ccs, code_point, Qunbound); +} + +Emchar decode_defined_char (Lisp_Object charset, int code_point); + INLINE_HEADER Emchar DECODE_CHAR (Lisp_Object charset, int code_point); INLINE_HEADER Emchar DECODE_CHAR (Lisp_Object charset, int code_point) { - Emchar char_id = DECODE_DEFINED_CHAR (charset, code_point); + Emchar char_id = decode_defined_char (charset, code_point); if (char_id >= 0) return char_id; @@ -583,7 +668,7 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) extern Lisp_Object Vcharacter_attribute_table; int encode_builtin_char_1 (Emchar c, Lisp_Object* charset); -int charset_code_point (Lisp_Object charset, Emchar ch); +int charset_code_point (Lisp_Object charset, Emchar ch, int defined_only); int range_charset_code_point (Lisp_Object charset, Emchar ch); extern Lisp_Object Vdefault_coded_charset_priority_list; @@ -600,7 +685,7 @@ encode_char_1 (Emchar ch, Lisp_Object* charset) *charset = Ffind_charset (Fcar (charsets)); if (!NILP (*charset)) { - int code_point = charset_code_point (*charset, ch); + int code_point = charset_code_point (*charset, ch, 0); if (code_point >= 0) return code_point; @@ -674,21 +759,4 @@ CHAR_TO_CHARC (Emchar ch) return cc; } - -/************************************************************************/ -/* Exported functions */ -/************************************************************************/ - -EXFUN (Fget_charset, 1); - -extern Lisp_Object Qucs; - -extern Lisp_Object Vcharset_japanese_jisx0208; -extern Lisp_Object Vcharset_japanese_jisx0208_1990; -extern Lisp_Object Vcharset_japanese_jisx0212; - -Lisp_Object put_char_ccs_code_point (Lisp_Object character, - Lisp_Object ccs, Lisp_Object value); -Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs); - #endif /* INCLUDED_char_ucs_h_ */