X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.c;h=3888b4a96850aa2400318e31d7e0410a4aea9865;hb=cec611d849c61918d50355d4970b5dd57f01eb4f;hp=dc1952c68976ddcdcc3aba85ad87161e27fda65d;hpb=fd1f8d7c5adf195d54ab65a48bdcf0df9ac3c0e0;p=chise%2Fxemacs-chise.git diff --git a/src/mule-charset.c b/src/mule-charset.c index dc1952c..3888b4a 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -1,7 +1,7 @@ /* Functions to handle multilingual characters. Copyright (C) 1992, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1999,2000,2001 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko This file is part of XEmacs. @@ -77,7 +77,7 @@ Lisp_Object Vcharset_latin_tcvn5712; Lisp_Object Vcharset_latin_viscii_lower; Lisp_Object Vcharset_latin_viscii_upper; Lisp_Object Vcharset_chinese_big5; -Lisp_Object Vcharset_chinese_big5_cdp; +/* Lisp_Object Vcharset_chinese_big5_cdp; */ Lisp_Object Vcharset_ideograph_hanziku_1; Lisp_Object Vcharset_ideograph_hanziku_2; Lisp_Object Vcharset_ideograph_hanziku_3; @@ -386,7 +386,7 @@ Lisp_Object Qleading_byte; Lisp_Object Qshort_name, Qlong_name; #ifdef UTF2000 Lisp_Object Qmin_code, Qmax_code, Qcode_offset; -Lisp_Object Qmother, Qconversion, Q94x60; +Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60; #endif Lisp_Object Qascii, @@ -428,7 +428,7 @@ Lisp_Object Qascii, Qvietnamese_viscii_lower, Qvietnamese_viscii_upper, Qchinese_big5, - Qchinese_big5_cdp, + /* Qchinese_big5_cdp, */ Qideograph_hanziku_1, Qideograph_hanziku_2, Qideograph_hanziku_3, @@ -986,9 +986,53 @@ get_unallocated_leading_byte (int dimension) Emchar decode_builtin_char (Lisp_Object charset, int code_point) { + Lisp_Object mother = XCHARSET_MOTHER (charset); int final; - if (EQ (charset, Vcharset_chinese_big5)) + if ( CHARSETP (mother) ) + { + int code = code_point; + + if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) + { + int row = code_point >> 8; + int cell = code_point & 255; + + if (row < 16 + 32) + return -1; + else if (row < 16 + 32 + 30) + code = (row - (16 + 32)) * 94 + cell - 33; + else if (row < 18 + 32 + 30) + return -1; + else if (row < 18 + 32 + 60) + code = (row - (18 + 32)) * 94 + cell - 33; + } + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 ) + { + int plane = code_point >> 16; + int row = (code_point >> 8) & 255; + int cell = code_point & 255; + + if (row < 16 + 32) + return -1; + else if (row < 16 + 32 + 30) + code + = (plane - 33) * 94 * 60 + + (row - (16 + 32)) * 94 + + cell - 33; + else if (row < 18 + 32 + 30) + return -1; + else if (row < 18 + 32 + 60) + code + = (plane - 33) * 94 * 60 + + (row - (18 + 32)) * 94 + + cell - 33; + } + return + decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset)); + } +#if 0 + else if (EQ (charset, Vcharset_chinese_big5)) { int c1 = code_point >> 8; int c2 = code_point & 0xFF; @@ -1014,6 +1058,7 @@ decode_builtin_char (Lisp_Object charset, int code_point) code_point = ((I / 94 + 33) << 8) | (I % 94 + 33); } } +#endif if ((final = XCHARSET_FINAL (charset)) >= '0') { if (XCHARSET_DIMENSION (charset) == 1) @@ -1084,116 +1129,130 @@ charset_code_point (Lisp_Object charset, Emchar ch) else { Lisp_Object mother = XCHARSET_MOTHER (charset); + int min = XCHARSET_MIN_CODE (charset); + int max = XCHARSET_MAX_CODE (charset); + int code; if ( CHARSETP (mother) ) + code = charset_code_point (mother, ch); + else + code = ch; + if ( (min <= code) && (code <= max) ) { - int min = XCHARSET_MIN_CODE (charset); - int max = XCHARSET_MAX_CODE (charset); - int code = charset_code_point (mother, ch); + int d = code - XCHARSET_CODE_OFFSET (charset); - if ( (min <= code) && (code <= max) ) + if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) { - if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) - { - int m = code - min; - int row = m / 94; - int cell = m % 94 + 33; - - if (row < 30) - row += 16 + 32; - else - row += 18 + 32; - return (row << 8) | cell; - } + int row = d / 94; + int cell = d % 94 + 33; + + if (row < 30) + row += 16 + 32; else - return code - XCHARSET_CODE_OFFSET (charset); + row += 18 + 32; + return (row << 8) | cell; } - } - } - return range_charset_code_point (charset, ch); -} - -int -range_charset_code_point (Lisp_Object charset, Emchar ch) -{ - int d; + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 ) + { + int plane = d / (94 * 60) + 33; + int row = (d % (94 * 60)) / 94; + int cell = d % 94 + 33; - if ((XCHARSET_MIN_CODE (charset) <= ch) - && (ch <= XCHARSET_MAX_CODE (charset))) - { - d = ch - XCHARSET_CODE_OFFSET (charset); - - if (XCHARSET_CHARS (charset) == 256) - return d; - else if (XCHARSET_DIMENSION (charset) == 1) - return d + XCHARSET_BYTE_OFFSET (charset); - else if (XCHARSET_DIMENSION (charset) == 2) - return - ((d / XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 8) - | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset)); - else if (XCHARSET_DIMENSION (charset) == 3) - return - ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - + XCHARSET_BYTE_OFFSET (charset)) << 16) - | ((d / XCHARSET_CHARS (charset) - % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 8) - | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset)); - else /* if (XCHARSET_DIMENSION (charset) == 4) */ - return - ((d / (XCHARSET_CHARS (charset) - * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - + XCHARSET_BYTE_OFFSET (charset)) << 24) - | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 16) - | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset)) << 8) - | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset)); - } - else if ( (XCHARSET_CODE_OFFSET (charset) == 0) || - (XCHARSET_CODE_OFFSET (charset) == XCHARSET_MIN_CODE (charset)) ) - { - if (XCHARSET_DIMENSION (charset) == 1) - { - if (XCHARSET_CHARS (charset) == 94) + if (row < 30) + row += 16 + 32; + else + row += 18 + 32; + return (plane << 16) | (row << 8) | cell; + } + else if (XCHARSET_CHARS (charset) == 94) { - if (((d = ch - (MIN_CHAR_94 - + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0) - && (d < 94)) + if (XCHARSET_DIMENSION (charset) == 1) return d + 33; + else if (XCHARSET_DIMENSION (charset) == 2) + return ((d / 94 + 33) << 8) | (d % 94 + 33); + else if (XCHARSET_DIMENSION (charset) == 3) + return + ( (d / (94 * 94) + 33) << 16) + | ((d / 94 % 94 + 33) << 8) + | (d % 94 + 33); + else /* if (XCHARSET_DIMENSION (charset) == 4) */ + return + ( (d / (94 * 94 * 94) + 33) << 24) + | ((d / (94 * 94) % 94 + 33) << 16) + | ((d / 94 % 94 + 33) << 8) + | (d % 94 + 33); } else if (XCHARSET_CHARS (charset) == 96) { - if (((d = ch - (MIN_CHAR_96 - + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0) - && (d < 96)) + if (XCHARSET_DIMENSION (charset) == 1) return d + 32; + else if (XCHARSET_DIMENSION (charset) == 2) + return ((d / 96 + 32) << 8) | (d % 96 + 32); + else if (XCHARSET_DIMENSION (charset) == 3) + return + ( (d / (96 * 96) + 32) << 16) + | ((d / 96 % 96 + 32) << 8) + | (d % 96 + 32); + else /* if (XCHARSET_DIMENSION (charset) == 4) */ + return + ( (d / (96 * 96 * 96) + 32) << 24) + | ((d / (96 * 96) % 96 + 32) << 16) + | ((d / 96 % 96 + 32) << 8) + | (d % 96 + 32); } else - return -1; + return code - XCHARSET_CODE_OFFSET (charset); } - else if (XCHARSET_DIMENSION (charset) == 2) + else if ( (XCHARSET_CODE_OFFSET (charset) == 0) || + (XCHARSET_CODE_OFFSET (charset) + == XCHARSET_MIN_CODE (charset)) ) { - if (XCHARSET_CHARS (charset) == 94) + int d; + + if (XCHARSET_DIMENSION (charset) == 1) { - if (((d = ch - (MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) - >= 0) - && (d < 94 * 94)) - return (((d / 94) + 33) << 8) | (d % 94 + 33); + if (XCHARSET_CHARS (charset) == 94) + { + if (((d = ch - (MIN_CHAR_94 + + (XCHARSET_FINAL (charset) - '0') * 94)) + >= 0) + && (d < 94)) + return d + 33; + } + else if (XCHARSET_CHARS (charset) == 96) + { + if (((d = ch - (MIN_CHAR_96 + + (XCHARSET_FINAL (charset) - '0') * 96)) + >= 0) + && (d < 96)) + return d + 32; + } + else + return -1; } - else if (XCHARSET_CHARS (charset) == 96) + else if (XCHARSET_DIMENSION (charset) == 2) { - if (((d = ch - (MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) - >= 0) - && (d < 96 * 96)) - return (((d / 96) + 32) << 8) | (d % 96 + 32); + if (XCHARSET_CHARS (charset) == 94) + { + if (((d = ch - (MIN_CHAR_94x94 + + + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) + >= 0) + && (d < 94 * 94)) + return (((d / 94) + 33) << 8) | (d % 94 + 33); + } + else if (XCHARSET_CHARS (charset) == 96) + { + if (((d = ch - (MIN_CHAR_96x96 + + + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) + >= 0) + && (d < 96 * 96)) + return (((d / 96) + 32) << 8) | (d % 96 + 32); + } + else + return -1; } - else - return -1; } } return -1; @@ -1437,6 +1496,7 @@ character set. Recognized properties are: this character set. 'dimension Number of octets used to index a character in this charset. Either 1 or 2. Defaults to 1. + If UTF-2000 feature is enabled, 3 or 4 are also available. 'columns Number of columns used to display a character in this charset. Only used in TTY mode. (Under X, the actual width of a character can be derived from the font used to display the @@ -1445,6 +1505,7 @@ character set. Recognized properties are: 'chars Number of characters in each dimension (94 or 96). Defaults to 94. Note that if the dimension is 2, the character set thus described is 94x94 or 96x96. + If UTF-2000 feature is enabled, 128 or 256 are also available. 'final Final byte of ISO 2022 escape sequence. Must be supplied. Each combination of (DIMENSION, CHARS) defines a separate namespace for final bytes. Note that ISO @@ -1469,6 +1530,13 @@ character set. Recognized properties are: is passed the octets of the character, with the high bit cleared and set depending upon whether the value of the 'graphic property is 0 or 1. +'mother [UTF-2000 only] Base coded-charset. +'code-min [UTF-2000 only] Minimum code-point of a base coded-charset. +'code-max [UTF-2000 only] Maximum code-point of a base coded-charset. +'code-offset [UTF-2000 only] Offset for a code-point of a base + coded-charset. +'conversion [UTF-2000 only] Conversion for a code-point of a base + coded-charset (94x60 or 94x94x60). */ (name, doc_string, props)) { @@ -1606,6 +1674,10 @@ character set. Recognized properties are: { if (EQ (value, Q94x60)) conversion = CONVERSION_94x60; + else if (EQ (value, Q94x94x60)) + conversion = CONVERSION_94x94x60; + else + signal_simple_error ("Unrecognized conversion", value); } #endif @@ -2397,6 +2469,7 @@ syms_of_mule_charset (void) defsymbol (&Qcode_offset, "code-offset"); defsymbol (&Qconversion, "conversion"); defsymbol (&Q94x60, "94x60"); + defsymbol (&Q94x94x60, "94x94x60"); #endif defsymbol (&Ql2r, "l2r"); @@ -2457,7 +2530,7 @@ syms_of_mule_charset (void) defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2"); defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa"); defsymbol (&Qchinese_big5, "chinese-big5"); - defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); + /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */ defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1"); defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2"); defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3"); @@ -2894,11 +2967,11 @@ complex_vars_of_mule_charset (void) build_string ("Big5"), build_string ("Big5"), build_string ("Big5 Chinese traditional"), - build_string ("big5"), + build_string ("big5-0"), Qnil, - 0 /* MIN_CHAR_BIG5_CDP */, - 0 /* MAX_CHAR_BIG5_CDP */, 0, 0, - Qnil, CONVERSION_IDENTICAL); + MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, + MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL); +#if 0 staticpro (&Vcharset_chinese_big5_cdp); Vcharset_chinese_big5_cdp = make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2, @@ -2909,6 +2982,7 @@ complex_vars_of_mule_charset (void) build_string ("big5\\.cdp-0"), Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL); +#endif #define DEF_HANZIKU(n) \ staticpro (&Vcharset_ideograph_hanziku_##n); \ Vcharset_ideograph_hanziku_##n = \