X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fxemacs-chise.git.1;a=blobdiff_plain;f=src%2Fchar-ucs.h;h=0c2f6ee6c23c932be5398ff4656352e5285f0608;hp=9d59f2135e7a7591aaa6c25982980d73dcfd2954;hb=414b512c0774e67ba8e160b605447d862d3be166;hpb=04330c2385f72b8bb040f4b3dcb159c66228db83 diff --git a/src/char-ucs.h b/src/char-ucs.h index 9d59f21..0c2f6ee 100644 --- a/src/char-ucs.h +++ b/src/char-ucs.h @@ -1,5 +1,5 @@ /* Header for UCS-4 character representation. - Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001,2002,2003,2004,2008,2009,2017 MORIOKA Tomohiko This file is part of XEmacs. @@ -48,7 +48,9 @@ extern Lisp_Object Vcharset_japanese_jisx0212; EXFUN (Fget_charset, 1); -extern Lisp_Object Qmap_ucs, Qucs; +extern Lisp_Object Qsystem_char_id; +extern Lisp_Object Qrep_ucs, Qucs; +extern Lisp_Object Q_subsumptive, Q_denotational; Lisp_Object put_char_ccs_code_point (Lisp_Object character, Lisp_Object ccs, Lisp_Object value); @@ -62,119 +64,121 @@ Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs); typedef short Charset_ID; /* ISO/IEC 10646 */ -#define LEADING_BYTE_UCS -177 +#define LEADING_BYTE_UCS (-177) /* represent normal 80-9F */ -#define LEADING_BYTE_CONTROL_1 -77 +#define LEADING_BYTE_CONTROL_1 (-77) /* ISO/IEC 10646 BMP */ -#define LEADING_BYTE_UCS_BMP -176 +#define LEADING_BYTE_UCS_BMP (-176) -/* ISO/IEC 10646 SMP */ -#define LEADING_BYTE_UCS_SMP 1 - -/* ISO/IEC 10646 SIP */ -#define LEADING_BYTE_UCS_SIP 2 +#define LEADING_BYTE_SYSTEM_CHAR_ID 0 /* Japanese JIS X0208 Common 2/4 2/{(8),9,10,11} 4/2 (B) */ -#define LEADING_BYTE_JIS_X0208 3 +#define LEADING_BYTE_JIS_X0208 1 /* Chinese GB 12345-1990 */ -#define LEADING_BYTE_CHINESE_GB12345 4 +#define LEADING_BYTE_CHINESE_GB12345 2 -#define LEADING_BYTE_CHINESE_BIG5 5 +#define LEADING_BYTE_CHINESE_BIG5 3 /* Big5 Level 1 2/4 2/{(8),9,10,11} 4/0 '0' */ -#define LEADING_BYTE_CHINESE_BIG5_1 6 +#define LEADING_BYTE_CHINESE_BIG5_1 4 /* Big5 Level 2 2/4 2/{(8),9,10,11} 4/0 '1' */ -#define LEADING_BYTE_CHINESE_BIG5_2 7 +#define LEADING_BYTE_CHINESE_BIG5_2 5 /* VISCII 1.1 */ -#define LEADING_BYTE_LATIN_VISCII 8 +#define LEADING_BYTE_LATIN_VISCII 6 /* MULE VISCII-LOWER (CHARSET_ID_OFFSET_96 + '1') */ -#define LEADING_BYTE_LATIN_VISCII_LOWER 9 +#define LEADING_BYTE_LATIN_VISCII_LOWER 7 /* MULE VISCII-UPPER (CHARSET_ID_OFFSET_96 + '2') */ -#define LEADING_BYTE_LATIN_VISCII_UPPER 10 +#define LEADING_BYTE_LATIN_VISCII_UPPER 8 -#define LEADING_BYTE_ETHIOPIC_UCS 11 +#define LEADING_BYTE_ETHIOPIC_UCS 9 + +/* ISO/IEC 10646 SMP */ +#define LEADING_BYTE_UCS_SMP 10 + +/* ISO/IEC 10646 SIP */ +#define LEADING_BYTE_UCS_SIP 11 #define MIN_LEADING_BYTE_PRIVATE 12 -#define MAX_LEADING_BYTE_PRIVATE 512 +#define MAX_LEADING_BYTE_PRIVATE 1024 /* ISO 646 IRV */ -#define LEADING_BYTE_ASCII -6 /* (CHARSET_ID_OFFSET_94 + 'B') */ +#define LEADING_BYTE_ASCII ( -6) /* (CHARSET_ID_OFFSET_94 + 'B') */ /* Right half of JIS X0201-1976 */ -#define LEADING_BYTE_KATAKANA_JISX0201 -13 /* (CHARSET_ID_OFFSET_94 + 'I') */ +#define LEADING_BYTE_KATAKANA_JISX0201 ( -13) /* (CHARSET_ID_OFFSET_94 + 'I') */ /* Left half of JIS X0201-1976 */ -#define LEADING_BYTE_LATIN_JISX0201 -14 /* (CHARSET_ID_OFFSET_94 + 'J') */ +#define LEADING_BYTE_LATIN_JISX0201 ( -14) /* (CHARSET_ID_OFFSET_94 + 'J') */ /* Right half of ISO 8859-1 */ -#define LEADING_BYTE_LATIN_ISO8859_1 -100 /* (CHARSET_ID_OFFSET_96 + 'A') */ +#define LEADING_BYTE_LATIN_ISO8859_1 (-100) /* (CHARSET_ID_OFFSET_96 + 'A') */ /* Right half of ISO 8859-2 */ -#define LEADING_BYTE_LATIN_ISO8859_2 -101 /* (CHARSET_ID_OFFSET_96 + 'B') */ +#define LEADING_BYTE_LATIN_ISO8859_2 (-101) /* (CHARSET_ID_OFFSET_96 + 'B') */ /* Right half of ISO 8859-3 */ -#define LEADING_BYTE_LATIN_ISO8859_3 -109 /* (CHARSET_ID_OFFSET_96 + 'C') */ +#define LEADING_BYTE_LATIN_ISO8859_3 (-109) /* (CHARSET_ID_OFFSET_96 + 'C') */ /* Right half of ISO 8859-4 */ -#define LEADING_BYTE_LATIN_ISO8859_4 -110 /* (CHARSET_ID_OFFSET_96 + 'D') */ +#define LEADING_BYTE_LATIN_ISO8859_4 (-110) /* (CHARSET_ID_OFFSET_96 + 'D') */ /* Right half of ISO 8859-7 */ -#define LEADING_BYTE_GREEK_ISO8859_7 -126 /* (CHARSET_ID_OFFSET_96 + 'F') */ +#define LEADING_BYTE_GREEK_ISO8859_7 (-126) /* (CHARSET_ID_OFFSET_96 + 'F') */ /* Right half of ISO 8859-6 */ -#define LEADING_BYTE_ARABIC_ISO8859_6 -127 /* (CHARSET_ID_OFFSET_96 + 'G') */ +#define LEADING_BYTE_ARABIC_ISO8859_6 (-127) /* (CHARSET_ID_OFFSET_96 + 'G') */ /* Right half of ISO 8859-8 */ -#define LEADING_BYTE_HEBREW_ISO8859_8 -138 /* (CHARSET_ID_OFFSET_96 + 'H') */ +#define LEADING_BYTE_HEBREW_ISO8859_8 (-138) /* (CHARSET_ID_OFFSET_96 + 'H') */ /* Right half of ISO 8859-5 */ -#define LEADING_BYTE_CYRILLIC_ISO8859_5 -144 /* (CHARSET_ID_OFFSET_96 + 'L') */ +#define LEADING_BYTE_CYRILLIC_ISO8859_5 (-144) /* (CHARSET_ID_OFFSET_96 + 'L') */ /* Right half of ISO 8859-9 */ -#define LEADING_BYTE_LATIN_ISO8859_9 -148 /* (CHARSET_ID_OFFSET_96 + 'M') */ +#define LEADING_BYTE_LATIN_ISO8859_9 (-148) /* (CHARSET_ID_OFFSET_96 + 'M') */ /* TIS620-2533 */ -#define LEADING_BYTE_THAI_TIS620 -166 /* (CHARSET_ID_OFFSET_96 + 'T') */ +#define LEADING_BYTE_THAI_TIS620 (-166) /* (CHARSET_ID_OFFSET_96 + 'T') */ /* Right-hand Part of the VSCII-2 (TCVN 5712:1983) */ -#define LEADING_BYTE_LATIN_TCVN5712 -180 /* F = 5/10 0x5A `Z' */ +#define LEADING_BYTE_LATIN_TCVN5712 (-180) /* F = 5/10 0x5A `Z' */ /* Japanese JIS X0208-1978 2/4 2/{(8),9,10,11} 4/0 (@) */ -#define LEADING_BYTE_JAPANESE_JISX0208_1978 -42 +#define LEADING_BYTE_JAPANESE_JISX0208_1978 ( -42) /* Chinese Hanzi GB2312-1980 2/4 2/{(8),9,10,11} 4/1 (A) */ -#define LEADING_BYTE_CHINESE_GB2312 -58 +#define LEADING_BYTE_CHINESE_GB2312 ( -58) /* Japanese JIS X0208-1983 2/4 2/{(8),9,10,11} 4/2 (B) */ -#define LEADING_BYTE_JAPANESE_JISX0208 -87 +#define LEADING_BYTE_JAPANESE_JISX0208 ( -87) /* Japanese JIS X0208-1990 2/4 2/{(8),9,10,11} 4/2 (B) */ -#define LEADING_BYTE_JAPANESE_JISX0208_1990 -168 +#define LEADING_BYTE_JAPANESE_JISX0208_1990 (-168) /* Hangul KS C5601-1987 2/4 2/{8,9,10,11} 4/3 (C) */ -#define LEADING_BYTE_KOREAN_KSC5601 -149 +#define LEADING_BYTE_KOREAN_KSC5601 (-149) /* Japanese JIS X0212-1990 2/4 2/{8,9,10,11} 4/4 (D) */ -#define LEADING_BYTE_JAPANESE_JISX0212 -159 +#define LEADING_BYTE_JAPANESE_JISX0212 (-159) /* CCITT Extended GB 2/4 2/{8,9,10,11} 4/5 (E) */ -#define LEADING_BYTE_CHINESE_CCITT_GB -165 +#define LEADING_BYTE_CHINESE_CCITT_GB (-165) /* Chinese CNS11643 Set 1 2/4 2/{8,9,10,11} 4/7 (G) */ -#define LEADING_BYTE_CHINESE_CNS11643_1 -171 +#define LEADING_BYTE_CHINESE_CNS11643_1 (-171) /* Chinese CNS11643 Set 2 2/4 2/{8,9,10,11} 4/8 (H) */ -#define LEADING_BYTE_CHINESE_CNS11643_2 -172 +#define LEADING_BYTE_CHINESE_CNS11643_2 (-172) /* Chinese CNS11643 Set 3 2/4 2/{8,9,10,11} 4/9 (I) */ #define LEADING_BYTE_CHINESE_CNS11643_3 -183 @@ -219,6 +223,8 @@ struct Lisp_Charset Lisp_Object ccl_program; + int iso_ir; + /* Final byte of this character set in ISO2022 designating escape sequence */ Bufbyte final; @@ -277,6 +283,7 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_COLUMNS(cs) ((cs)->columns) #define CHARSET_GRAPHIC(cs) ((cs)->graphic) #define CHARSET_DIRECTION(cs) ((cs)->direction) +#define CHARSET_ISO_IR(cs) ((cs)->iso_ir) #define CHARSET_FINAL(cs) ((cs)->final) #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) #define CHARSET_REGISTRY(cs) ((cs)->registry) @@ -492,6 +499,10 @@ CHARSET_BY_ATTRIBUTES (int chars, int dimension, int final, int dir) #define MAX_CHAR_HANZIKU_12 (0x62000000 + 65536 * 12 + 65535) */ +#define CHAR_ISOLATED_ONLY -1 +#define CHAR_ALL 0 +#define CHAR_DEFINED_ONLY 1 + Emchar decode_builtin_char (Lisp_Object charset, int code_point); INLINE_HEADER Lisp_Object @@ -539,95 +550,9 @@ put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code, } } -INLINE_HEADER void +void decoding_table_put_char (Lisp_Object ccs, int code_point, Lisp_Object character); -INLINE_HEADER void -decoding_table_put_char (Lisp_Object ccs, - int code_point, Lisp_Object character) -{ -#if 1 - Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs); - int dim = XCHARSET_DIMENSION (ccs); - - if (dim == 1) - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, code_point, character); - else if (dim == 2) - { - Lisp_Object table2 - = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8)); - - table2 = put_ccs_octet_table (table2, ccs, - (unsigned char)code_point, character); - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, - (unsigned char)(code_point >> 8), table2); - } - else if (dim == 3) - { - Lisp_Object table2 - = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16)); - Lisp_Object table3 - = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8)); - - table3 = put_ccs_octet_table (table3, ccs, - (unsigned char)code_point, character); - table2 = put_ccs_octet_table (table2, ccs, - (unsigned char)(code_point >> 8), table3); - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, - (unsigned char)(code_point >> 16), table2); - } - else /* if (dim == 4) */ - { - Lisp_Object table2 - = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24)); - Lisp_Object table3 - = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16)); - Lisp_Object table4 - = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8)); - - table4 = put_ccs_octet_table (table4, ccs, - (unsigned char)code_point, character); - table3 = put_ccs_octet_table (table3, ccs, - (unsigned char)(code_point >> 8), table4); - table2 = put_ccs_octet_table (table2, ccs, - (unsigned char)(code_point >> 16), table3); - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, - (unsigned char)(code_point >> 24), table2); - } -#else - Lisp_Object v = XCHARSET_DECODING_TABLE (ccs); - int dim = XCHARSET_DIMENSION (ccs); - int byte_offset = XCHARSET_BYTE_OFFSET (ccs); - int i = -1; - Lisp_Object nv; - int ccs_len = XVECTOR_LENGTH (v); - - while (dim > 0) - { - dim--; - i = ((code_point >> (8 * dim)) & 255) - byte_offset; - nv = XVECTOR_DATA(v)[i]; - if (dim > 0) - { - if (!VECTORP (nv)) - { - if (EQ (nv, character)) - return; - else - nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil)); - } - v = nv; - } - else - break; - } - XVECTOR_DATA(v)[i] = character; -#endif -} INLINE_HEADER void decoding_table_remove_char (Lisp_Object ccs, int code_point); @@ -637,17 +562,20 @@ decoding_table_remove_char (Lisp_Object ccs, int code_point) decoding_table_put_char (ccs, code_point, Qunbound); } -#ifdef HAVE_CHISE_CLIENT +#ifdef HAVE_CHISE Emchar load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point); #endif -Emchar decode_defined_char (Lisp_Object charset, int code_point); +Emchar decode_defined_char (Lisp_Object charset, int code_point, + int without_inheritance); -INLINE_HEADER Emchar DECODE_CHAR (Lisp_Object charset, int code_point); INLINE_HEADER Emchar -DECODE_CHAR (Lisp_Object charset, int code_point) +DECODE_CHAR (Lisp_Object charset, int code_point, int without_inheritance); +INLINE_HEADER Emchar +DECODE_CHAR (Lisp_Object charset, int code_point, int without_inheritance) { - Emchar char_id = decode_defined_char (charset, code_point); + Emchar char_id + = decode_defined_char (charset, code_point, without_inheritance); if (char_id >= 0) return char_id; @@ -663,18 +591,22 @@ INLINE_HEADER Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2) { if (XCHARSET_DIMENSION (charset) == 1) - return DECODE_CHAR (charset, c1); + return DECODE_CHAR (charset, c1, 0); else - return DECODE_CHAR (charset, (c1 << 8) | c2); + return DECODE_CHAR (charset, (c1 << 8) | c2, 0); } extern Lisp_Object Vcharacter_attribute_table; +int encode_char_2 (Emchar ch, Lisp_Object* charset); int encode_builtin_char_1 (Emchar c, Lisp_Object* charset); int charset_code_point (Lisp_Object charset, Emchar ch, int defined_only); int range_charset_code_point (Lisp_Object charset, Emchar ch); extern Lisp_Object Vdefault_coded_charset_priority_list; +extern Lisp_Object Vdisplay_coded_charset_priority_use_inheritance; +extern Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order; + EXFUN (Ffind_charset, 1); INLINE_HEADER int encode_char_1 (Emchar ch, Lisp_Object* charset); @@ -700,6 +632,35 @@ encode_char_1 (Emchar ch, Lisp_Object* charset) return encode_builtin_char_1 (ch, charset); } +INLINE_HEADER int +encode_char_2_search_children (Emchar ch, Lisp_Object* charset); +INLINE_HEADER int +encode_char_2_search_children (Emchar ch, Lisp_Object* charset) +{ + int code_point; + Lisp_Object rest; + + rest = Fget_char_attribute (make_char (ch), Q_subsumptive, Qnil); + for ( ; !NILP (rest); rest = XCDR (rest) ) + { + Lisp_Object c = XCAR (rest); + + code_point = charset_code_point (*charset, XCHAR (c), 0); + if (code_point >= 0) + return code_point; + } + rest = Fget_char_attribute (make_char (ch), Q_denotational, Qnil); + for ( ; !NILP (rest); rest = XCDR (rest) ) + { + Lisp_Object c = XCAR (rest); + + code_point = charset_code_point (*charset, XCHAR (c), 0); + if (code_point >= 0) + return code_point; + } + return -1; +} + #define ENCODE_CHAR(ch, charset) encode_char_1 (ch, &(charset)) INLINE_HEADER void @@ -758,7 +719,7 @@ CHAR_TO_CHARC (Emchar ch) { Charc cc; - cc.code_point = encode_char_1 (ch, &cc.charset); + cc.code_point = encode_char_2 (ch, &cc.charset); return cc; }