X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.c;h=f184a29a505744f2b81f80d5a509d92fe71a3a23;hb=112b4eb6a71cd613a534b4246b4cc024ed9bf22e;hp=db53d9366ff385d726ad206177b721331891303a;hpb=ce39a01b94f78d2106be9de60565925955c66c84;p=chise%2Fxemacs-chise.git- diff --git a/src/mule-charset.c b/src/mule-charset.c index db53d93..f184a29 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -1,7 +1,7 @@ /* Functions to handle multilingual characters. Copyright (C) 1992, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1999,2000,2001 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko This file is part of XEmacs. @@ -77,7 +77,7 @@ Lisp_Object Vcharset_latin_tcvn5712; Lisp_Object Vcharset_latin_viscii_lower; Lisp_Object Vcharset_latin_viscii_upper; Lisp_Object Vcharset_chinese_big5; -Lisp_Object Vcharset_chinese_big5_cdp; +/* Lisp_Object Vcharset_chinese_big5_cdp; */ Lisp_Object Vcharset_ideograph_hanziku_1; Lisp_Object Vcharset_ideograph_hanziku_2; Lisp_Object Vcharset_ideograph_hanziku_3; @@ -176,20 +176,6 @@ const Bytecount rep_bytes_by_first_byte[0xA0] = #ifdef UTF2000 -INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs); -INLINE_HEADER int -CHARSET_BYTE_SIZE (Lisp_Charset* cs) -{ - /* ad-hoc method for `ascii' */ - if ((CHARSET_CHARS (cs) == 94) && - (CHARSET_BYTE_OFFSET (cs) != 33)) - return 128 - CHARSET_BYTE_OFFSET (cs); - else - return CHARSET_CHARS (cs); -} - -#define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs)) - int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len); int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len) @@ -218,58 +204,6 @@ decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len) return 0; } -INLINE_HEADER void -decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset, - int code_point); -INLINE_HEADER void -decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset, - int code_point) -{ - int i = -1; - - while (dim > 0) - { - Lisp_Object nv; - - dim--; - i = ((code_point >> (8 * dim)) & 255) - byte_offset; - nv = XVECTOR_DATA(v)[i]; - if (!VECTORP (nv)) - break; - v = nv; - } - if (i >= 0) - XVECTOR_DATA(v)[i] = Qnil; -} - -INLINE_HEADER void -decoding_table_put_char (Lisp_Object v, int dim, int byte_offset, - int code_point, Lisp_Object character); -INLINE_HEADER void -decoding_table_put_char (Lisp_Object v, int dim, int byte_offset, - int code_point, Lisp_Object character) -{ - int i = -1; - Lisp_Object nv; - int ccs_len = XVECTOR_LENGTH (v); - - while (dim > 0) - { - dim--; - i = ((code_point >> (8 * dim)) & 255) - byte_offset; - nv = XVECTOR_DATA(v)[i]; - if (dim > 0) - { - if (!VECTORP (nv)) - nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil)); - v = nv; - } - else - break; - } - XVECTOR_DATA(v)[i] = character; -} - Lisp_Object put_char_ccs_code_point (Lisp_Object character, Lisp_Object ccs, Lisp_Object value) @@ -279,9 +213,6 @@ put_char_ccs_code_point (Lisp_Object character, || (XCHAR (character) != XINT (value))) { Lisp_Object v = XCHARSET_DECODING_TABLE (ccs); - int dim = XCHARSET_DIMENSION (ccs); - int ccs_len = XCHARSET_BYTE_SIZE (ccs); - int byte_offset = XCHARSET_BYTE_OFFSET (ccs); int code_point; if (CONSP (value)) @@ -331,16 +262,10 @@ put_char_ccs_code_point (Lisp_Object character, Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil); if (INTP (cpos)) { - decoding_table_remove_char (v, dim, byte_offset, XINT (cpos)); + decoding_table_remove_char (ccs, XINT (cpos)); } } - else - { - XCHARSET_DECODING_TABLE (ccs) - = v = make_vector (ccs_len, Qnil); - } - - decoding_table_put_char (v, dim, byte_offset, code_point, character); + decoding_table_put_char (ccs, code_point, character); } return value; } @@ -357,15 +282,12 @@ remove_char_ccs (Lisp_Object character, Lisp_Object ccs) if (!NILP (cpos)) { - decoding_table_remove_char (decoding_table, - XCHARSET_DIMENSION (ccs), - XCHARSET_BYTE_OFFSET (ccs), - XINT (cpos)); + decoding_table_remove_char (ccs, XINT (cpos)); } } if (CHAR_TABLEP (encoding_table)) { - put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil); + put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound); } return Qt; } @@ -385,8 +307,8 @@ Lisp_Object Qreverse_direction_charset; Lisp_Object Qleading_byte; Lisp_Object Qshort_name, Qlong_name; #ifdef UTF2000 -Lisp_Object Qmin_code, Qmax_code; -Lisp_Object Qmother, Qconversion, Q94x60; +Lisp_Object Qmin_code, Qmax_code, Qcode_offset; +Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60, Qbig5_1, Qbig5_2; #endif Lisp_Object Qascii, @@ -428,7 +350,7 @@ Lisp_Object Qascii, Qvietnamese_viscii_lower, Qvietnamese_viscii_upper, Qchinese_big5, - Qchinese_big5_cdp, + /* Qchinese_big5_cdp, */ Qideograph_hanziku_1, Qideograph_hanziku_2, Qideograph_hanziku_3, @@ -895,7 +817,7 @@ make_charset (Charset_ID id, Lisp_Object name, CHARSET_CCL_PROGRAM (cs) = Qnil; CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; #ifdef UTF2000 - CHARSET_DECODING_TABLE(cs) = Qnil; + CHARSET_DECODING_TABLE(cs) = Qunbound; CHARSET_MIN_CODE (cs) = min_code; CHARSET_MAX_CODE (cs) = max_code; CHARSET_CODE_OFFSET (cs) = code_offset; @@ -984,11 +906,141 @@ get_unallocated_leading_byte (int dimension) #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40) Emchar +decode_defined_char (Lisp_Object ccs, int code_point) +{ + int dim = XCHARSET_DIMENSION (ccs); + Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs); + Emchar char_id = -1; + Lisp_Object mother; + + while (dim > 0) + { + dim--; + decoding_table + = get_ccs_octet_table (decoding_table, ccs, + (code_point >> (dim * 8)) & 255); + } + if (CHARP (decoding_table)) + return XCHAR (decoding_table); +#ifdef HAVE_DATABASE + if (EQ (decoding_table, Qunloaded) || + EQ (decoding_table, Qunbound) || + NILP (decoding_table) ) + { + char_id = load_char_decoding_entry_maybe (ccs, code_point); + } +#endif + if (char_id >= 0) + return char_id; + else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) ) + { + if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL ) + { + if ( EQ (mother, Vcharset_ucs) ) + return DECODE_CHAR (mother, code_point); + else + return decode_defined_char (mother, code_point); + } + else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 ) + { + unsigned int I + = (((code_point >> 8) & 0x7F) - 33) * 94 + + (( code_point & 0x7F) - 33); + unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1; + unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40); + + b2 += b2 < 0x3F ? 0x40 : 0x62; + return decode_defined_char (mother, (b1 << 8) | b2); + } + else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 ) + { + unsigned int I + = (((code_point >> 8) & 0x7F) - 33) * 94 + + (( code_point & 0x7F) - 33) + + BIG5_SAME_ROW * (0xC9 - 0xA1); + unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1; + unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40); + + b2 += b2 < 0x3F ? 0x40 : 0x62; + return decode_defined_char (mother, (b1 << 8) | b2); + } + } + return -1; +} + +Emchar decode_builtin_char (Lisp_Object charset, int code_point) { + Lisp_Object mother = XCHARSET_MOTHER (charset); int final; - if (EQ (charset, Vcharset_chinese_big5)) + if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) ) + { + int code = code_point; + + if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) + { + int row = code_point >> 8; + int cell = code_point & 255; + + if (row < 16 + 32) + return -1; + else if (row < 16 + 32 + 30) + code = (row - (16 + 32)) * 94 + cell - 33; + else if (row < 18 + 32 + 30) + return -1; + else if (row < 18 + 32 + 60) + code = (row - (18 + 32)) * 94 + cell - 33; + } + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 ) + { + int plane = code_point >> 16; + int row = (code_point >> 8) & 255; + int cell = code_point & 255; + + if (row < 16 + 32) + return -1; + else if (row < 16 + 32 + 30) + code + = (plane - 33) * 94 * 60 + + (row - (16 + 32)) * 94 + + cell - 33; + else if (row < 18 + 32 + 30) + return -1; + else if (row < 18 + 32 + 60) + code + = (plane - 33) * 94 * 60 + + (row - (18 + 32)) * 94 + + cell - 33; + } + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 ) + { + unsigned int I + = (((code_point >> 8) & 0x7F) - 33) * 94 + + (( code_point & 0x7F) - 33); + unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1; + unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40); + + b2 += b2 < 0x3F ? 0x40 : 0x62; + code = (b1 << 8) | b2; + } + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 ) + { + unsigned int I + = (((code_point >> 8) & 0x7F) - 33) * 94 + + (( code_point & 0x7F) - 33) + + BIG5_SAME_ROW * (0xC9 - 0xA1); + unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1; + unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40); + + b2 += b2 < 0x3F ? 0x40 : 0x62; + code = (b1 << 8) | b2; + } + return + decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset)); + } +#if 0 + else if (EQ (charset, Vcharset_chinese_big5)) { int c1 = code_point >> 8; int c2 = code_point & 0xFF; @@ -1014,6 +1066,7 @@ decode_builtin_char (Lisp_Object charset, int code_point) code_point = ((I / 94 + 33) << 8) | (I % 94 + 33); } } +#endif if ((final = XCHARSET_FINAL (charset)) >= '0') { if (XCHARSET_DIMENSION (charset) == 1) @@ -1061,7 +1114,7 @@ decode_builtin_char (Lisp_Object charset, int code_point) ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset) + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset)) - - XCHARSET_CODE_OFFSET (charset) + XCHARSET_MIN_CODE (charset); + + XCHARSET_CODE_OFFSET (charset); if ((cid < XCHARSET_MIN_CODE (charset)) || (XCHARSET_MAX_CODE (charset) < cid)) return -1; @@ -1084,115 +1137,156 @@ charset_code_point (Lisp_Object charset, Emchar ch) else { Lisp_Object mother = XCHARSET_MOTHER (charset); + int min = XCHARSET_MIN_CODE (charset); + int max = XCHARSET_MAX_CODE (charset); + int code; if ( CHARSETP (mother) ) + code = charset_code_point (mother, ch); + else + code = ch; + if ( ((max == 0) && CHARSETP (mother)) || + ((min <= code) && (code <= max)) ) { - int min = XCHARSET_MIN_CODE (charset); - int max = XCHARSET_MAX_CODE (charset); - int code = charset_code_point (mother, ch); + int d = code - XCHARSET_CODE_OFFSET (charset); + + if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL ) + return d; + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 ) + return d + 33; + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 ) + return d + 32; + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) + { + int row = d / 94; + int cell = d % 94 + 33; - if ( (min <= code) && (code <= max) ) + if (row < 30) + row += 16 + 32; + else + row += 18 + 32; + return (row << 8) | cell; + } + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 ) { - if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) + int B1 = d >> 8, B2 = d & 0xFF; + unsigned int I + = (B1 - 0xA1) * BIG5_SAME_ROW + B2 + - (B2 < 0x7F ? 0x40 : 0x62); + + if (B1 < 0xC9) { - int m = code - min; - int row = m / 94; - int cell = m % 94 + 33; - - if (row < 30) - row += 16 + 32; - else - row += 18 + 32; - return (row << 8) | cell; + return ((I / 94 + 33) << 8) | (I % 94 + 33); } - else - return code - min + XCHARSET_CODE_OFFSET (charset); } - } - } - return range_charset_code_point (charset, ch); -} - -int -range_charset_code_point (Lisp_Object charset, Emchar ch) -{ - int d; - - if ((XCHARSET_MIN_CODE (charset) <= ch) - && (ch <= XCHARSET_MAX_CODE (charset))) - { - d = ch - XCHARSET_MIN_CODE (charset) + XCHARSET_CODE_OFFSET (charset); - - if (XCHARSET_CHARS (charset) == 256) - return d; - else if (XCHARSET_DIMENSION (charset) == 1) - return d + XCHARSET_BYTE_OFFSET (charset); - else if (XCHARSET_DIMENSION (charset) == 2) - return - ((d / XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 8) - | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset)); - else if (XCHARSET_DIMENSION (charset) == 3) - return - ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - + XCHARSET_BYTE_OFFSET (charset)) << 16) - | ((d / XCHARSET_CHARS (charset) - % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 8) - | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset)); - else /* if (XCHARSET_DIMENSION (charset) == 4) */ - return - ((d / (XCHARSET_CHARS (charset) - * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - + XCHARSET_BYTE_OFFSET (charset)) << 24) - | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 16) - | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 8) - | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset)); - } - else if (XCHARSET_CODE_OFFSET (charset) == 0) - { - if (XCHARSET_DIMENSION (charset) == 1) - { - if (XCHARSET_CHARS (charset) == 94) + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 ) { - if (((d = ch - (MIN_CHAR_94 - + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0) - && (d < 94)) - return d + 33; + int B1 = d >> 8, B2 = d & 0xFF; + unsigned int I + = (B1 - 0xA1) * BIG5_SAME_ROW + B2 + - (B2 < 0x7F ? 0x40 : 0x62); + + if (B1 >= 0xC9) + { + I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1); + return ((I / 94 + 33) << 8) | (I % 94 + 33); + } } - else if (XCHARSET_CHARS (charset) == 96) + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 ) + return ((d / 94 + 33) << 8) | (d % 94 + 33); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 ) + return ((d / 96 + 32) << 8) | (d % 96 + 32); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 ) { - if (((d = ch - (MIN_CHAR_96 - + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0) - && (d < 96)) - return d + 32; + int plane = d / (94 * 60) + 33; + int row = (d % (94 * 60)) / 94; + int cell = d % 94 + 33; + + if (row < 30) + row += 16 + 32; + else + row += 18 + 32; + return (plane << 16) | (row << 8) | cell; } + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 ) + return + ( (d / (94 * 94) + 33) << 16) + | ((d / 94 % 94 + 33) << 8) + | (d % 94 + 33); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 ) + return + ( (d / (96 * 96) + 32) << 16) + | ((d / 96 % 96 + 32) << 8) + | (d % 96 + 32); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 ) + return + ( (d / (94 * 94 * 94) + 33) << 24) + | ((d / (94 * 94) % 94 + 33) << 16) + | ((d / 94 % 94 + 33) << 8) + | (d % 94 + 33); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 ) + return + ( (d / (96 * 96 * 96) + 32) << 24) + | ((d / (96 * 96) % 96 + 32) << 16) + | ((d / 96 % 96 + 32) << 8) + | (d % 96 + 32); else - return -1; + { + printf ("Unknown CCS-conversion %d is specified!", + XCHARSET_CONVERSION (charset)); + exit (-1); + } } - else if (XCHARSET_DIMENSION (charset) == 2) + else if ( (XCHARSET_CODE_OFFSET (charset) == 0) || + (XCHARSET_CODE_OFFSET (charset) + == XCHARSET_MIN_CODE (charset)) ) { - if (XCHARSET_CHARS (charset) == 94) + int d; + + if (XCHARSET_DIMENSION (charset) == 1) { - if (((d = ch - (MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) - >= 0) - && (d < 94 * 94)) - return (((d / 94) + 33) << 8) | (d % 94 + 33); + if (XCHARSET_CHARS (charset) == 94) + { + if (((d = ch - (MIN_CHAR_94 + + (XCHARSET_FINAL (charset) - '0') * 94)) + >= 0) + && (d < 94)) + return d + 33; + } + else if (XCHARSET_CHARS (charset) == 96) + { + if (((d = ch - (MIN_CHAR_96 + + (XCHARSET_FINAL (charset) - '0') * 96)) + >= 0) + && (d < 96)) + return d + 32; + } + else + return -1; } - else if (XCHARSET_CHARS (charset) == 96) + else if (XCHARSET_DIMENSION (charset) == 2) { - if (((d = ch - (MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) - >= 0) - && (d < 96 * 96)) - return (((d / 96) + 32) << 8) | (d % 96 + 32); + if (XCHARSET_CHARS (charset) == 94) + { + if (((d = ch - (MIN_CHAR_94x94 + + + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) + >= 0) + && (d < 94 * 94)) + return (((d / 94) + 33) << 8) | (d % 94 + 33); + } + else if (XCHARSET_CHARS (charset) == 96) + { + if (((d = ch - (MIN_CHAR_96x96 + + + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) + >= 0) + && (d < 96 * 96)) + return (((d / 96) + 32) << 8) | (d % 96 + 32); + } + else + return -1; } - else - return -1; } } return -1; @@ -1436,6 +1530,7 @@ character set. Recognized properties are: this character set. 'dimension Number of octets used to index a character in this charset. Either 1 or 2. Defaults to 1. + If UTF-2000 feature is enabled, 3 or 4 are also available. 'columns Number of columns used to display a character in this charset. Only used in TTY mode. (Under X, the actual width of a character can be derived from the font used to display the @@ -1444,6 +1539,7 @@ character set. Recognized properties are: 'chars Number of characters in each dimension (94 or 96). Defaults to 94. Note that if the dimension is 2, the character set thus described is 94x94 or 96x96. + If UTF-2000 feature is enabled, 128 or 256 are also available. 'final Final byte of ISO 2022 escape sequence. Must be supplied. Each combination of (DIMENSION, CHARS) defines a separate namespace for final bytes. Note that ISO @@ -1468,6 +1564,13 @@ character set. Recognized properties are: is passed the octets of the character, with the high bit cleared and set depending upon whether the value of the 'graphic property is 0 or 1. +'mother [UTF-2000 only] Base coded-charset. +'code-min [UTF-2000 only] Minimum code-point of a base coded-charset. +'code-max [UTF-2000 only] Maximum code-point of a base coded-charset. +'code-offset [UTF-2000 only] Offset for a code-point of a base + coded-charset. +'conversion [UTF-2000 only] Conversion for a code-point of a base + coded-charset (94x60, 94x94x60, big5-1 or big5-2). */ (name, doc_string, props)) { @@ -1477,8 +1580,8 @@ character set. Recognized properties are: Lisp_Object charset; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; - int min_code = 0, max_code = 0; Lisp_Object mother = Qnil; + int min_code = 0, max_code = 0, code_offset = 0; int byte_offset = -1; int conversion = 0; @@ -1578,35 +1681,41 @@ character set. Recognized properties are: } #ifdef UTF2000 + else if (EQ (keyword, Qmother)) + { + mother = Fget_charset (value); + } + else if (EQ (keyword, Qmin_code)) { CHECK_INT (value); - min_code = XINT (value); - if (min_code < 0) - { - min_code = (~(-1 - min_code)) & 0x7FFFFFFF; - } + min_code = XUINT (value); } else if (EQ (keyword, Qmax_code)) { CHECK_INT (value); - max_code = XINT (value); - if (max_code < 0) - { - max_code = (~(-1 - max_code)) & 0x7FFFFFFF; - } + max_code = XUINT (value); } - else if (EQ (keyword, Qmother)) + else if (EQ (keyword, Qcode_offset)) { - mother = Fget_charset (value); + CHECK_INT (value); + code_offset = XUINT (value); } else if (EQ (keyword, Qconversion)) { if (EQ (value, Q94x60)) conversion = CONVERSION_94x60; + else if (EQ (value, Q94x94x60)) + conversion = CONVERSION_94x94x60; + else if (EQ (value, Qbig5_1)) + conversion = CONVERSION_BIG5_1; + else if (EQ (value, Qbig5_2)) + conversion = CONVERSION_BIG5_2; + else + signal_simple_error ("Unrecognized conversion", value); } #endif @@ -1670,7 +1779,7 @@ character set. Recognized properties are: charset = make_charset (id, name, chars, dimension, columns, graphic, final, direction, short_name, long_name, doc_string, registry, - Qnil, min_code, max_code, 0, byte_offset, + Qnil, min_code, max_code, code_offset, byte_offset, mother, conversion); if (!NILP (ccl_program)) XCHARSET_CCL_PROGRAM (charset) = ccl_program; @@ -2033,7 +2142,183 @@ Set mapping-table of CHARSET to TABLE. } return table; } + +DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /* +Save mapping-table of CHARSET. +*/ + (charset)) +{ +#ifdef HAVE_DATABASE + struct Lisp_Charset *cs; + int byte_min, byte_max; + Lisp_Object db; + Lisp_Object db_file; + + charset = Fget_charset (charset); + cs = XCHARSET (charset); + + db_file = char_attribute_system_db_file (CHARSET_NAME (cs), + Qsystem_char_id, 1); + db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil); + + byte_min = CHARSET_BYTE_OFFSET (cs); + byte_max = byte_min + CHARSET_BYTE_SIZE (cs); + switch (CHARSET_DIMENSION (cs)) + { + case 1: + { + Lisp_Object table_c = XCHARSET_DECODING_TABLE (charset); + int cell; + + for (cell = byte_min; cell < byte_max; cell++) + { + Lisp_Object c = get_ccs_octet_table (table_c, charset, cell); + + if (CHARP (c)) + Fput_database (Fprin1_to_string (make_int (cell), Qnil), + Fprin1_to_string (c, Qnil), + db, Qt); + } + } + break; + case 2: + { + Lisp_Object table_r = XCHARSET_DECODING_TABLE (charset); + int row; + + for (row = byte_min; row < byte_max; row++) + { + Lisp_Object table_c = get_ccs_octet_table (table_r, charset, row); + int cell; + + for (cell = byte_min; cell < byte_max; cell++) + { + Lisp_Object c = get_ccs_octet_table (table_c, charset, cell); + + if (CHARP (c)) + Fput_database (Fprin1_to_string (make_int ((row << 8) + | cell), + Qnil), + Fprin1_to_string (c, Qnil), + db, Qt); + } + } + } + break; + case 3: + { + Lisp_Object table_p = XCHARSET_DECODING_TABLE (charset); + int plane; + + for (plane = byte_min; plane < byte_max; plane++) + { + Lisp_Object table_r + = get_ccs_octet_table (table_p, charset, plane); + int row; + + for (row = byte_min; row < byte_max; row++) + { + Lisp_Object table_c + = get_ccs_octet_table (table_r, charset, row); + int cell; + + for (cell = byte_min; cell < byte_max; cell++) + { + Lisp_Object c = get_ccs_octet_table (table_c, charset, + cell); + + if (CHARP (c)) + Fput_database (Fprin1_to_string (make_int ((plane << 16) + | (row << 8) + | cell), + Qnil), + Fprin1_to_string (c, Qnil), + db, Qt); + } + } + } + } + break; + default: + { + Lisp_Object table_g = XCHARSET_DECODING_TABLE (charset); + int group; + + for (group = byte_min; group < byte_max; group++) + { + Lisp_Object table_p + = get_ccs_octet_table (table_g, charset, group); + int plane; + + for (plane = byte_min; plane < byte_max; plane++) + { + Lisp_Object table_r + = get_ccs_octet_table (table_p, charset, plane); + int row; + + for (row = byte_min; row < byte_max; row++) + { + Lisp_Object table_c + = get_ccs_octet_table (table_r, charset, row); + int cell; + + for (cell = byte_min; cell < byte_max; cell++) + { + Lisp_Object c + = get_ccs_octet_table (table_c, charset, cell); + + if (CHARP (c)) + Fput_database (Fprin1_to_string + (make_int (( group << 24) + | (plane << 16) + | (row << 8) + | cell), + Qnil), + Fprin1_to_string (c, Qnil), + db, Qt); + } + } + } + } + } + } + return Fclose_database (db); +#else + return Qnil; #endif +} + +#ifdef HAVE_CHISE_CLIENT +Emchar +load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point) +{ + Lisp_Object db; + Lisp_Object db_file + = char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id, + 0); + + db = Fopen_database (db_file, Qnil, Qnil, build_string ("r"), Qnil); + if (!NILP (db)) + { + Lisp_Object ret + = Fget_database (Fprin1_to_string (make_int (code_point), Qnil), + db, Qnil); + if (!NILP (ret)) + { + ret = Fread (ret); + if (CHARP (ret)) + { + decoding_table_put_char (ccs, code_point, ret); + Fclose_database (db); + return XCHAR (ret); + } + } + Fclose_database (db); + } + return -1; +} +#endif /* HAVE_CHISE_CLIENT */ +#endif /* UTF2000 */ /************************************************************************/ @@ -2058,7 +2343,7 @@ If corresponding character is not found, nil is returned. if (NILP (defined_only)) c = DECODE_CHAR (charset, c); else - c = DECODE_DEFINED_CHAR (charset, c); + c = decode_defined_char (charset, c); return c >= 0 ? make_char (c) : Qnil; } @@ -2201,7 +2486,7 @@ N defaults to 0 if omitted. DEFUN ("encode-char", Fencode_char, 2, 2, 0, /* Return code-point of CHARACTER in specified CHARSET. */ - (charset, character)) + (character, charset)) { int code_point; @@ -2372,6 +2657,7 @@ syms_of_mule_charset (void) DEFSUBR (Fdecode_char); DEFSUBR (Fdecode_builtin_char); DEFSUBR (Fencode_char); + DEFSUBR (Fsave_charset_mapping_table); #endif DEFSUBR (Fmake_char); DEFSUBR (Fchar_charset); @@ -2392,11 +2678,15 @@ syms_of_mule_charset (void) defsymbol (&Qshort_name, "short-name"); defsymbol (&Qlong_name, "long-name"); #ifdef UTF2000 + defsymbol (&Qmother, "mother"); defsymbol (&Qmin_code, "min-code"); defsymbol (&Qmax_code, "max-code"); - defsymbol (&Qmother, "mother"); + defsymbol (&Qcode_offset, "code-offset"); defsymbol (&Qconversion, "conversion"); defsymbol (&Q94x60, "94x60"); + defsymbol (&Q94x94x60, "94x94x60"); + defsymbol (&Qbig5_1, "big5-1"); + defsymbol (&Qbig5_2, "big5-2"); #endif defsymbol (&Ql2r, "l2r"); @@ -2457,7 +2747,7 @@ syms_of_mule_charset (void) defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2"); defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa"); defsymbol (&Qchinese_big5, "chinese-big5"); - defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); + /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */ defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1"); defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2"); defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3"); @@ -2551,7 +2841,7 @@ complex_vars_of_mule_charset (void) build_string ("UCS"), build_string ("ISO/IEC 10646"), build_string (""), - Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_bmp); Vcharset_ucs_bmp = make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2, @@ -2570,8 +2860,8 @@ complex_vars_of_mule_charset (void) build_string ("UCS-SMP"), build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"), build_string ("UCS00-1"), - Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP, + MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_sip); Vcharset_ucs_sip = make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2, @@ -2580,8 +2870,8 @@ complex_vars_of_mule_charset (void) build_string ("UCS-SIP"), build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"), build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"), - Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, + MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_cns); Vcharset_ucs_cns = make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3, @@ -2590,17 +2880,17 @@ complex_vars_of_mule_charset (void) build_string ("UCS for CNS 11643"), build_string ("ISO/IEC 10646 for CNS 11643"), build_string (""), - Qnil, 0, 0, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_jis); Vcharset_ucs_jis = make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3, 2, 2, 0, CHARSET_LEFT_TO_RIGHT, build_string ("UCS for JIS"), build_string ("UCS for JIS X 0208, 0212 and 0213"), - build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"), + build_string + ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"), build_string (""), - Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_ks); Vcharset_ucs_ks = make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3, @@ -2609,7 +2899,7 @@ complex_vars_of_mule_charset (void) build_string ("UCS for CCS defined by KS"), build_string ("ISO/IEC 10646 for Korean Standards"), build_string (""), - Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_big5); Vcharset_ucs_big5 = make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3, @@ -2618,7 +2908,7 @@ complex_vars_of_mule_charset (void) build_string ("UCS for Big5"), build_string ("ISO/IEC 10646 for Big5"), build_string (""), - Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL); #else # define MIN_CHAR_THAI 0 # define MAX_CHAR_THAI 0 @@ -2644,7 +2934,7 @@ complex_vars_of_mule_charset (void) build_string ("Control characters"), build_string ("Control characters 128-191"), build_string (""), - Qnil, 0x80, 0x9F, 0, 0, Qnil, CONVERSION_IDENTICAL); + Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_latin_iso8859_1); Vcharset_latin_iso8859_1 = make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1, @@ -2653,7 +2943,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-1 (Latin-1)"), build_string ("ISO8859-1 (Latin-1)"), build_string ("iso8859-1"), - Qnil, 0xA0, 0xFF, 0, 32, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_latin_iso8859_2); Vcharset_latin_iso8859_2 = make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1, @@ -2689,8 +2979,8 @@ complex_vars_of_mule_charset (void) build_string ("TIS620 (Thai)"), build_string ("TIS620.2529 (Thai)"), build_string ("tis620"), - Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, + MIN_CHAR_THAI, 32, Qnil, CONVERSION_96); staticpro (&Vcharset_greek_iso8859_7); Vcharset_greek_iso8859_7 = make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1, @@ -2807,8 +3097,8 @@ complex_vars_of_mule_charset (void) build_string ("jisx0208\\.1990"), Qnil, MIN_CHAR_JIS_X0208_1990, - MAX_CHAR_JIS_X0208_1990, 0, 33, - Qnil, CONVERSION_IDENTICAL); + MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33, + Qnil, CONVERSION_94x94); #endif staticpro (&Vcharset_korean_ksc5601); Vcharset_korean_ksc5601 = @@ -2894,11 +3184,11 @@ complex_vars_of_mule_charset (void) build_string ("Big5"), build_string ("Big5"), build_string ("Big5 Chinese traditional"), - build_string ("big5"), + build_string ("big5-0"), Qnil, - 0 /* MIN_CHAR_BIG5_CDP */, - 0 /* MAX_CHAR_BIG5_CDP */, 0, 0, - Qnil, CONVERSION_IDENTICAL); + MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, + MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL); +#if 0 staticpro (&Vcharset_chinese_big5_cdp); Vcharset_chinese_big5_cdp = make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2, @@ -2907,8 +3197,9 @@ complex_vars_of_mule_charset (void) build_string ("Big5 + CDP extension"), build_string ("Big5 with CDP extension"), build_string ("big5\\.cdp-0"), - Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, + MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL); +#endif #define DEF_HANZIKU(n) \ staticpro (&Vcharset_ideograph_hanziku_##n); \ Vcharset_ideograph_hanziku_##n = \ @@ -2919,8 +3210,8 @@ complex_vars_of_mule_charset (void) build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \ build_string \ ("hanziku-"#n"$"), \ - Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0, \ - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \ + MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL); DEF_HANZIKU (1); DEF_HANZIKU (2); DEF_HANZIKU (3); @@ -2941,8 +3232,8 @@ complex_vars_of_mule_charset (void) build_string ("JEF + CHINA3"), build_string ("JEF + CHINA3 private characters"), build_string ("china3jef-0"), - Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, + MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ideograph_cbeta); Vcharset_ideograph_cbeta = make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2, @@ -2951,8 +3242,8 @@ complex_vars_of_mule_charset (void) build_string ("CBETA"), build_string ("CBETA private characters"), build_string ("cbeta-0"), - Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, + MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ideograph_gt); Vcharset_ideograph_gt = make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3, @@ -2961,8 +3252,8 @@ complex_vars_of_mule_charset (void) build_string ("GT"), build_string ("GT"), build_string (""), - Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_GT, MAX_CHAR_GT, + MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL); #define DEF_GT_PJ(n) \ staticpro (&Vcharset_ideograph_gt_pj_##n); \ Vcharset_ideograph_gt_pj_##n = \ @@ -3005,8 +3296,8 @@ complex_vars_of_mule_charset (void) build_string ("Daikanwa dictionary (second revised version)"), build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"), - Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0, - Qnil, CONVERSION_IDENTICAL); + Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, + MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ethiopic_ucs); Vcharset_ethiopic_ucs = @@ -3016,7 +3307,7 @@ complex_vars_of_mule_charset (void) build_string ("Ethiopic (UCS)"), build_string ("Ethiopic of UCS"), build_string ("Ethiopic-Unicode"), - Qnil, 0x1200, 0x137F, 0x1200, 0, + Qnil, 0x1200, 0x137F, 0, 0, Qnil, CONVERSION_IDENTICAL); #endif staticpro (&Vcharset_chinese_big5_1); @@ -3028,7 +3319,8 @@ complex_vars_of_mule_charset (void) build_string ("Big5 Level-1 Chinese traditional"), build_string ("big5"), - Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */ + Vcharset_chinese_big5, CONVERSION_BIG5_1); staticpro (&Vcharset_chinese_big5_2); Vcharset_chinese_big5_2 = make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2, @@ -3038,7 +3330,8 @@ complex_vars_of_mule_charset (void) build_string ("Big5 Level-2 Chinese traditional"), build_string ("big5"), - Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 33, /* Qnil, CONVERSION_IDENTICAL */ + Vcharset_chinese_big5, CONVERSION_BIG5_2); #ifdef ENABLE_COMPOSITE_CHARS /* #### For simplicity, we put composite chars into a 96x96 charset.