X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.c;h=75002b7883822e076c4f291a1b2451eb99cb5156;hb=7c507c6c79353d3396b1317a06e31e6b0bb91d5f;hp=5a6fc4c5387644961c0724c114585d8906434499;hpb=d921f40aa497cb1cb1f50875fa0429165d3c4083;p=chise%2Fxemacs-chise.git- diff --git a/src/mule-charset.c b/src/mule-charset.c index 5a6fc4c..75002b7 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -68,6 +68,7 @@ Lisp_Object Vcharset_ucs; Lisp_Object Vcharset_ucs_bmp; Lisp_Object Vcharset_ucs_smp; Lisp_Object Vcharset_ucs_sip; +Lisp_Object Vcharset_ucs_gb; Lisp_Object Vcharset_ucs_cns; Lisp_Object Vcharset_ucs_jis; Lisp_Object Vcharset_ucs_ks; @@ -76,8 +77,8 @@ Lisp_Object Vcharset_latin_viscii; Lisp_Object Vcharset_latin_tcvn5712; Lisp_Object Vcharset_latin_viscii_lower; Lisp_Object Vcharset_latin_viscii_upper; +Lisp_Object Vcharset_jis_x0208; Lisp_Object Vcharset_chinese_big5; -/* Lisp_Object Vcharset_chinese_big5_cdp; */ Lisp_Object Vcharset_ideograph_hanziku_1; Lisp_Object Vcharset_ideograph_hanziku_2; Lisp_Object Vcharset_ideograph_hanziku_3; @@ -90,7 +91,6 @@ Lisp_Object Vcharset_ideograph_hanziku_9; Lisp_Object Vcharset_ideograph_hanziku_10; Lisp_Object Vcharset_ideograph_hanziku_11; Lisp_Object Vcharset_ideograph_hanziku_12; -Lisp_Object Vcharset_china3_jef; Lisp_Object Vcharset_ideograph_cbeta; Lisp_Object Vcharset_ideograph_gt; Lisp_Object Vcharset_ideograph_gt_pj_1; @@ -339,6 +339,7 @@ Lisp_Object Qascii, Qucs_bmp, Qucs_smp, Qucs_sip, + Qucs_gb, Qucs_cns, Qucs_jis, Qucs_ks, @@ -349,6 +350,7 @@ Lisp_Object Qascii, Qlatin_viscii_upper, Qvietnamese_viscii_lower, Qvietnamese_viscii_upper, + Qjis_x0208, Qchinese_big5, /* Qchinese_big5_cdp, */ Qideograph_hanziku_1, @@ -363,7 +365,6 @@ Lisp_Object Qascii, Qideograph_hanziku_10, Qideograph_hanziku_11, Qideograph_hanziku_12, - Qchina3_jef, Qideograph_cbeta, Qideograph_daikanwa_2, Qideograph_daikanwa, @@ -943,77 +944,68 @@ decode_builtin_char (Lisp_Object charset, int code_point) Lisp_Object mother = XCHARSET_MOTHER (charset); int final; - if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) ) + if ( XCHARSET_MAX_CODE (charset) > 0 ) { - int code = code_point; - - if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) - { - int row = code_point >> 8; - int cell = code_point & 255; - - if (row < 16 + 32) - return -1; - else if (row < 16 + 32 + 30) - code = (row - (16 + 32)) * 94 + cell - 33; - else if (row < 18 + 32 + 30) - return -1; - else if (row < 18 + 32 + 60) - code = (row - (18 + 32)) * 94 + cell - 33; - } - else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 ) - { - int plane = code_point >> 16; - int row = (code_point >> 8) & 255; - int cell = code_point & 255; - - if (row < 16 + 32) - return -1; - else if (row < 16 + 32 + 30) - code - = (plane - 33) * 94 * 60 - + (row - (16 + 32)) * 94 - + cell - 33; - else if (row < 18 + 32 + 30) - return -1; - else if (row < 18 + 32 + 60) - code - = (plane - 33) * 94 * 60 - + (row - (18 + 32)) * 94 - + cell - 33; - } - return - decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset)); - } -#if 0 - else if (EQ (charset, Vcharset_chinese_big5)) - { - int c1 = code_point >> 8; - int c2 = code_point & 0xFF; - unsigned int I; - - if ( ( (0xA1 <= c1) && (c1 <= 0xFE) ) - && - ( ((0x40 <= c2) && (c2 <= 0x7E)) || - ((0xA1 <= c2) && (c2 <= 0xFE)) ) ) + if ( CHARSETP (mother) ) { - I = (c1 - 0xA1) * BIG5_SAME_ROW - + c2 - (c2 < 0x7F ? 0x40 : 0x62); + int code = code_point; - if (c1 < 0xC9) + if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) { - charset = Vcharset_chinese_big5_1; + int row = code_point >> 8; + int cell = code_point & 255; + + if (row < 16 + 32) + return -1; + else if (row < 16 + 32 + 30) + code = (row - (16 + 32)) * 94 + cell - 33; + else if (row < 18 + 32 + 30) + return -1; + else if (row < 18 + 32 + 60) + code = (row - (18 + 32)) * 94 + cell - 33; } - else + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 ) { - charset = Vcharset_chinese_big5_2; - I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1); + int plane = code_point >> 16; + int row = (code_point >> 8) & 255; + int cell = code_point & 255; + + if (row < 16 + 32) + return -1; + else if (row < 16 + 32 + 30) + code + = (plane - 33) * 94 * 60 + + (row - (16 + 32)) * 94 + + cell - 33; + else if (row < 18 + 32 + 30) + return -1; + else if (row < 18 + 32 + 60) + code + = (plane - 33) * 94 * 60 + + (row - (18 + 32)) * 94 + + cell - 33; } - code_point = ((I / 94 + 33) << 8) | (I % 94 + 33); + return + decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset)); + } + else + { + Emchar cid + = (XCHARSET_DIMENSION (charset) == 1 + ? + code_point - XCHARSET_BYTE_OFFSET (charset) + : + ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset)) + * XCHARSET_CHARS (charset) + + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset)) + + XCHARSET_CODE_OFFSET (charset); + if ((cid < XCHARSET_MIN_CODE (charset)) + || (XCHARSET_MAX_CODE (charset) < cid)) + return -1; + return cid; } } -#endif - if ((final = XCHARSET_FINAL (charset)) >= '0') + else if ((final = XCHARSET_FINAL (charset)) >= '0') { if (XCHARSET_DIMENSION (charset) == 1) { @@ -1050,28 +1042,12 @@ decode_builtin_char (Lisp_Object charset, int code_point) } } } - else if (XCHARSET_MAX_CODE (charset)) - { - Emchar cid - = (XCHARSET_DIMENSION (charset) == 1 - ? - code_point - XCHARSET_BYTE_OFFSET (charset) - : - ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset)) - * XCHARSET_CHARS (charset) - + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset)) - + XCHARSET_CODE_OFFSET (charset); - if ((cid < XCHARSET_MIN_CODE (charset)) - || (XCHARSET_MAX_CODE (charset) < cid)) - return -1; - return cid; - } else return -1; } int -charset_code_point (Lisp_Object charset, Emchar ch) +charset_code_point (Lisp_Object charset, Emchar ch, int defined_only) { Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset); Lisp_Object ret; @@ -1085,18 +1061,33 @@ charset_code_point (Lisp_Object charset, Emchar ch) Lisp_Object mother = XCHARSET_MOTHER (charset); int min = XCHARSET_MIN_CODE (charset); int max = XCHARSET_MAX_CODE (charset); - int code; + int code = -1; if ( CHARSETP (mother) ) - code = charset_code_point (mother, ch); - else + { + if (XCHARSET_FINAL (charset) >= '0') + code = charset_code_point (mother, ch, 1); + else + code = charset_code_point (mother, ch, defined_only); + } + else if (defined_only) + return -1; + else if ( ((max == 0) && CHARSETP (mother) + && (XCHARSET_FINAL (charset) == 0)) + || ((min <= ch) && (ch <= max)) ) code = ch; - if ( ((max == 0) && CHARSETP (mother)) || - ((min <= code) && (code <= max)) ) + if ( ((max == 0) && CHARSETP (mother) && (code >= 0)) + || ((min <= code) && (code <= max)) ) { int d = code - XCHARSET_CODE_OFFSET (charset); - if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) + if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL ) + return d; + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 ) + return d + 33; + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 ) + return d + 32; + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 ) { int row = d / 94; int cell = d % 94 + 33; @@ -1107,6 +1098,10 @@ charset_code_point (Lisp_Object charset, Emchar ch) row += 18 + 32; return (row << 8) | cell; } + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 ) + return ((d / 94 + 33) << 8) | (d % 94 + 33); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 ) + return ((d / 96 + 32) << 8) | (d % 96 + 32); else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 ) { int plane = d / (94 * 60) + 33; @@ -1119,48 +1114,42 @@ charset_code_point (Lisp_Object charset, Emchar ch) row += 18 + 32; return (plane << 16) | (row << 8) | cell; } - else if (XCHARSET_CHARS (charset) == 94) - { - if (XCHARSET_DIMENSION (charset) == 1) - return d + 33; - else if (XCHARSET_DIMENSION (charset) == 2) - return ((d / 94 + 33) << 8) | (d % 94 + 33); - else if (XCHARSET_DIMENSION (charset) == 3) - return - ( (d / (94 * 94) + 33) << 16) - | ((d / 94 % 94 + 33) << 8) - | (d % 94 + 33); - else /* if (XCHARSET_DIMENSION (charset) == 4) */ - return - ( (d / (94 * 94 * 94) + 33) << 24) - | ((d / (94 * 94) % 94 + 33) << 16) - | ((d / 94 % 94 + 33) << 8) - | (d % 94 + 33); - } - else if (XCHARSET_CHARS (charset) == 96) + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 ) + return + ( (d / (94 * 94) + 33) << 16) + | ((d / 94 % 94 + 33) << 8) + | (d % 94 + 33); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 ) + return + ( (d / (96 * 96) + 32) << 16) + | ((d / 96 % 96 + 32) << 8) + | (d % 96 + 32); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 ) + return + ( (d / (94 * 94 * 94) + 33) << 24) + | ((d / (94 * 94) % 94 + 33) << 16) + | ((d / 94 % 94 + 33) << 8) + | (d % 94 + 33); + else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 ) + return + ( (d / (96 * 96 * 96) + 32) << 24) + | ((d / (96 * 96) % 96 + 32) << 16) + | ((d / 96 % 96 + 32) << 8) + | (d % 96 + 32); + else { - if (XCHARSET_DIMENSION (charset) == 1) - return d + 32; - else if (XCHARSET_DIMENSION (charset) == 2) - return ((d / 96 + 32) << 8) | (d % 96 + 32); - else if (XCHARSET_DIMENSION (charset) == 3) - return - ( (d / (96 * 96) + 32) << 16) - | ((d / 96 % 96 + 32) << 8) - | (d % 96 + 32); - else /* if (XCHARSET_DIMENSION (charset) == 4) */ - return - ( (d / (96 * 96 * 96) + 32) << 24) - | ((d / (96 * 96) % 96 + 32) << 16) - | ((d / 96 % 96 + 32) << 8) - | (d % 96 + 32); + printf ("Unknown CCS-conversion %d is specified!", + XCHARSET_CONVERSION (charset)); + exit (-1); } - else - return code - XCHARSET_CODE_OFFSET (charset); } - else if ( (XCHARSET_CODE_OFFSET (charset) == 0) || + else if ( ( XCHARSET_FINAL (charset) >= '0' ) && + ( XCHARSET_MIN_CODE (charset) == 0 ) + /* + (XCHARSET_CODE_OFFSET (charset) == 0) || (XCHARSET_CODE_OFFSET (charset) - == XCHARSET_MIN_CODE (charset)) ) + == XCHARSET_MIN_CODE (charset)) + */ ) { int d; @@ -2224,16 +2213,17 @@ N defaults to 0 if omitted. } #ifdef UTF2000 -DEFUN ("encode-char", Fencode_char, 2, 2, 0, /* +DEFUN ("encode-char", Fencode_char, 2, 3, 0, /* Return code-point of CHARACTER in specified CHARSET. */ - (character, charset)) + (character, charset, defined_only)) { int code_point; CHECK_CHAR_COERCE_INT (character); charset = Fget_charset (charset); - code_point = charset_code_point (charset, XCHAR (character)); + code_point = charset_code_point (charset, XCHAR (character), + !NILP (defined_only)); if (code_point >= 0) return make_int (code_point); else @@ -2460,6 +2450,7 @@ syms_of_mule_charset (void) defsymbol (&Qucs_bmp, "ucs-bmp"); defsymbol (&Qucs_smp, "ucs-smp"); defsymbol (&Qucs_sip, "ucs-sip"); + defsymbol (&Qucs_gb, "ucs-gb"); defsymbol (&Qucs_cns, "ucs-cns"); defsymbol (&Qucs_jis, "ucs-jis"); defsymbol (&Qucs_ks, "ucs-ks"); @@ -2470,6 +2461,7 @@ syms_of_mule_charset (void) defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper"); defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower"); defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper"); + defsymbol (&Qjis_x0208, "=jis-x0208"); defsymbol (&Qideograph_gt, "ideograph-gt"); defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1"); defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2"); @@ -2498,7 +2490,6 @@ syms_of_mule_charset (void) defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10"); defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11"); defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12"); - defsymbol (&Qchina3_jef, "china3-jef"); defsymbol (&Qideograph_cbeta, "ideograph-cbeta"); defsymbol (&Qethiopic_ucs, "ethiopic-ucs"); #endif @@ -2610,6 +2601,15 @@ complex_vars_of_mule_charset (void) build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"), Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL); + staticpro (&Vcharset_ucs_gb); + Vcharset_ucs_gb = + make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3, + 2, 2, 0, CHARSET_LEFT_TO_RIGHT, + build_string ("UCS for GB"), + build_string ("UCS for GB"), + build_string ("ISO/IEC 10646 for GB"), + build_string (""), + Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_cns); Vcharset_ucs_cns = make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3, @@ -2717,8 +2717,7 @@ complex_vars_of_mule_charset (void) build_string ("TIS620 (Thai)"), build_string ("TIS620.2529 (Thai)"), build_string ("tis620"), - Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, - MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_greek_iso8859_7); Vcharset_greek_iso8859_7 = make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1, @@ -2785,6 +2784,21 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-9 (Latin-5)"), build_string ("iso8859-9"), Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL); +#ifdef UTF2000 + staticpro (&Vcharset_jis_x0208); + Vcharset_jis_x0208 = + make_charset (LEADING_BYTE_JIS_X0208, + Qjis_x0208, 94, 2, + 2, 0, 'B', CHARSET_LEFT_TO_RIGHT, + build_string ("JIS X0208"), + build_string ("JIS X0208 Common"), + build_string ("JIS X0208 Common part"), + build_string ("jisx0208\\.1990"), + Qnil, + MIN_CHAR_JIS_X0208_1990, + MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33, + Qnil, CONVERSION_94x94); +#endif staticpro (&Vcharset_japanese_jisx0208_1978); Vcharset_japanese_jisx0208_1978 = make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, @@ -2795,7 +2809,13 @@ complex_vars_of_mule_charset (void) build_string ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"), build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), - Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 33, +#ifdef UTF2000 + Vcharset_jis_x0208, +#else + Qnil, +#endif + CONVERSION_IDENTICAL); staticpro (&Vcharset_chinese_gb2312); Vcharset_chinese_gb2312 = make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2, @@ -2822,7 +2842,13 @@ complex_vars_of_mule_charset (void) build_string ("JIS X0208:1983 (Japanese)"), build_string ("JIS X0208:1983 Japanese Kanji"), build_string ("jisx0208\\.1983"), - Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL); + Qnil, 0, 0, 0, 33, +#ifdef UTF2000 + Vcharset_jis_x0208, +#else + Qnil, +#endif + CONVERSION_IDENTICAL); #ifdef UTF2000 staticpro (&Vcharset_japanese_jisx0208_1990); Vcharset_japanese_jisx0208_1990 = @@ -2834,9 +2860,11 @@ complex_vars_of_mule_charset (void) build_string ("JIS X0208:1990 Japanese Kanji"), build_string ("jisx0208\\.1990"), Qnil, - MIN_CHAR_JIS_X0208_1990, - MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33, - Qnil, CONVERSION_IDENTICAL); + 0x2121 /* MIN_CHAR_JIS_X0208_1990 */, + 0x7426 /* MAX_CHAR_JIS_X0208_1990 */, + 0 /* MIN_CHAR_JIS_X0208_1990 */, 33, + Vcharset_jis_x0208 /* Qnil */, + CONVERSION_IDENTICAL /* CONVERSION_94x94 */); #endif staticpro (&Vcharset_korean_ksc5601); Vcharset_korean_ksc5601 = @@ -2962,16 +2990,6 @@ complex_vars_of_mule_charset (void) DEF_HANZIKU (10); DEF_HANZIKU (11); DEF_HANZIKU (12); - staticpro (&Vcharset_china3_jef); - Vcharset_china3_jef = - make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2, - 2, 2, 0, CHARSET_LEFT_TO_RIGHT, - build_string ("JC3"), - build_string ("JEF + CHINA3"), - build_string ("JEF + CHINA3 private characters"), - build_string ("china3jef-0"), - Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, - MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ideograph_cbeta); Vcharset_ideograph_cbeta = make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,