From 843c8a78336244f93d1bd86c2cb765623e9dfe20 Mon Sep 17 00:00:00 2001 From: tomo Date: Sun, 15 Dec 2002 07:17:18 +0000 Subject: [PATCH] Sync with r21-2-19-utf-2000-0_4-3. --- lisp/ChangeLog | 9 +++++ lisp/mule/chinese.el | 41 ++++++++++----------- src/ChangeLog | 49 +++++++++++++++++++++++++ src/character.h | 97 ++++++++++++++++++++++++++++++++++++++++---------- src/file-coding.c | 6 ++++ src/mule-charset.c | 89 +++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 246 insertions(+), 45 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index df15071..b776c26 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,12 @@ +1999-08-28 MORIOKA Tomohiko + + * mule/chinese.el (chinese-cns11643-3): Don't define if `utf-2000' + is provided because it is defined as builtin charset. + (chinese-cns11643-4): Likewise. + (chinese-cns11643-5): Likewise. + (chinese-cns11643-6): Likewise. + (chinese-cns11643-7): Likewise. + 1999-06-17 MORIOKA Tomohiko * bytecomp.el (byte-compile-insert-header): Use utf-8 as diff --git a/lisp/mule/chinese.el b/lisp/mule/chinese.el index 7ab691e..accb940 100644 --- a/lisp/mule/chinese.el +++ b/lisp/mule/chinese.el @@ -50,26 +50,27 @@ ;; that appear once in some ancient manuscript and whose meaning ;; is unknown. -(flet - ((make-chinese-cns11643-charset - (name plane final) - (make-charset - name (concat "CNS 11643 Plane " plane " (Chinese traditional)") - `(registry - ,(concat "CNS11643[.-]\\(.*[.-]\\)?" plane "$") - dimension 2 - chars 94 - final ,final - graphic 0)) - (modify-syntax-entry name "w") - (modify-category-entry name ?t) - )) - (make-chinese-cns11643-charset 'chinese-cns11643-3 "3" ?I) - (make-chinese-cns11643-charset 'chinese-cns11643-4 "4" ?J) - (make-chinese-cns11643-charset 'chinese-cns11643-5 "5" ?K) - (make-chinese-cns11643-charset 'chinese-cns11643-6 "6" ?L) - (make-chinese-cns11643-charset 'chinese-cns11643-7 "7" ?M) - ) +(unless (featurep 'utf-2000) + (flet + ((make-chinese-cns11643-charset + (name plane final) + (make-charset + name (concat "CNS 11643 Plane " plane " (Chinese traditional)") + `(registry + ,(concat "CNS11643[.-]\\(.*[.-]\\)?" plane "$") + dimension 2 + chars 94 + final ,final + graphic 0)) + (modify-syntax-entry name "w") + (modify-category-entry name ?t) + )) + (make-chinese-cns11643-charset 'chinese-cns11643-3 "3" ?I) + (make-chinese-cns11643-charset 'chinese-cns11643-4 "4" ?J) + (make-chinese-cns11643-charset 'chinese-cns11643-5 "5" ?K) + (make-chinese-cns11643-charset 'chinese-cns11643-6 "6" ?L) + (make-chinese-cns11643-charset 'chinese-cns11643-7 "7" ?M) + )) ;; ISO-IR-165 (CCITT Extended GB) ;; It is based on CCITT Recommendation T.101, includes GB 2312-80 + diff --git a/src/ChangeLog b/src/ChangeLog index e4da5d0..4516844 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,52 @@ +1999-08-30 MORIOKA Tomohiko + + * character.h (LEADING_BYTE_COMPOSITE): Deleted. + +1999-08-29 MORIOKA Tomohiko + + * character.h (REP_BYTES_BY_FIRST_BYTE): Change order of + condition. + +1999-08-28 MORIOKA Tomohiko + + * character.h (LEADING_BYTE_PRIVATE_P): Don't define in UTF2000. + (CHARSET_PRIVATE_P): Likewise. + (XCHARSET_PRIVATE_P): Likewise. + (MAKE_CHAR): Don't use XCHARSET_PRIVATE_P in UTF2000. + + * file-coding.c (encode_coding_ucs4): Don't implement in UTF2000. + (decode_coding_iso2022): Don't use XCHARSET_PRIVATE_P in UTF2000. + +1999-08-28 MORIOKA Tomohiko + + * character.h (LEADING_BYTE_*): Changed in UTF2000. + (NUM_LEADING_BYTES): Changed from 128 to 256. + (FIELD1_TO_PRIVATE_LEADING_BYTE): Change value to 0x80 in UTF2000. + (FIELD1_TO_OFFICIAL_LEADING_BYTE): Change value to 0x80 in + UTF2000. + (FIELD2_TO_PRIVATE_LEADING_BYTE): Change value to 0x80 in UTF2000. + + * mule-charset.c (Vcharset_chinese_cns11643_3): New variable in + UTF2000. + (Vcharset_chinese_cns11643_4): New variable in UTF2000. + (Vcharset_chinese_cns11643_5): New variable in UTF2000. + (Vcharset_chinese_cns11643_6): New variable in UTF2000. + (Vcharset_chinese_cns11643_7): New variable in UTF2000. + (Qchinese_cns11643_3): New variable in UTF2000. + (Qchinese_cns11643_4): New variable in UTF2000. + (Qchinese_cns11643_5): New variable in UTF2000. + (Qchinese_cns11643_6): New variable in UTF2000. + (Qchinese_cns11643_7): New variable in UTF2000. + (syms_of_mule_charset): Define `chinese-cns11643-3', + `chinese-cns11643-4', `chinese-cns11643-5', `chinese-cns11643-6' + and `chinese-cns11643-7' in UTF2000. + (vars_of_mule_charset): Initialize + next_allocated_2_byte_leading_byte by LEADING_BYTE_CHINESE_BIG5_2 + + 1 in UTF2000. + (complex_vars_of_mule_charset): Setup charset + `chinese-cns11643-3', `chinese-cns11643-4', `chinese-cns11643-5', + `chinese-cns11643-6' and `chinese-cns11643-7' in UTF2000. + 1999-08-27 MORIOKA Tomohiko * mule-charset.c: Move setting for `leading-code-private-11' from diff --git a/src/character.h b/src/character.h index f35b450..ce3905e 100644 --- a/src/character.h +++ b/src/character.h @@ -35,9 +35,6 @@ Boston, MA 02111-1307, USA. */ #define MIN_LEADING_BYTE 0x80 /* These need special treatment in a string and/or character */ #define LEADING_BYTE_ASCII 0x8E /* Omitted in a buffer */ -#ifdef ENABLE_COMPOSITE_CHARS -#endif -#define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ /* Note the gap in each official charset can cause core dump @@ -65,6 +62,43 @@ enum LEADING_BYTE_OFFICIAL_1 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9 +#ifdef UTF2000 + +#define LEADING_BYTE_CHINESE_BIG5_1 0xB0 /* Big5 Level 1 */ +#define LEADING_BYTE_CHINESE_BIG5_2 0xB1 /* Big5 Level 2 */ +#define MIN_LEADING_BYTE_PRIVATE_2 0xB0 +#define MAX_LEADING_BYTE_PRIVATE_2 0xBF + +/** The following are for 2-byte characters in an official charset. **/ + +#define LEADING_BYTE_JAPANESE_JISX0208_1978 0xC0/* Japanese JIS X0208-1978 */ +#define LEADING_BYTE_CHINESE_GB2312 0xC1 /* Chinese Hanzi GB2312-1980 */ +#define LEADING_BYTE_JAPANESE_JISX0208 0xC2 /* Japanese JIS X0208-1983 */ +#define LEADING_BYTE_KOREAN_KSC5601 0xC3 /* Hangul KS C5601-1987 */ +#define LEADING_BYTE_JAPANESE_JISX0212 0xC4 /* Japanese JIS X0212-1990 */ +#define LEADING_BYTE_CHINESE_CCITT_GB 0xC5 /* CCITT Extended GB */ +#define LEADING_BYTE_CHINESE_CNS11643_1 0xC7 /* Chinese CNS11643 Set 1 */ +#define LEADING_BYTE_CHINESE_CNS11643_2 0xC8 /* Chinese CNS11643 Set 2 */ +#define LEADING_BYTE_CHINESE_CNS11643_3 0xC9 /* Chinese CNS11643 Set 3 */ +#define LEADING_BYTE_CHINESE_CNS11643_4 0xCA /* Chinese CNS11643 Set 4 */ +#define LEADING_BYTE_CHINESE_CNS11643_5 0xCB /* Chinese CNS11643 Set 5 */ +#define LEADING_BYTE_CHINESE_CNS11643_6 0xCC /* Chinese CNS11643 Set 6 */ +#define LEADING_BYTE_CHINESE_CNS11643_7 0xCD /* Chinese CNS11643 Set 7 */ +#define LEADING_BYTE_KOREAN_KPS9566 0xCE /* DPRK Hangul KPS 9566-1997 */ + +#define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 +#define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_KOREAN_KPS9566 + +/** The following are for 1- and 2-byte characters in a private charset. **/ + +#define PRE_LEADING_BYTE_PRIVATE_1 0x120 /* 1-byte char-set */ +#define PRE_LEADING_BYTE_PRIVATE_2 0x121 /* 2-byte char-set */ + +#define MIN_LEADING_BYTE_PRIVATE_1 0x0D0 +#define MAX_LEADING_BYTE_PRIVATE_1 0x11f + +#else + /** The following are for 2-byte characters in an official charset. **/ enum LEADING_BYTE_OFFICIAL_2 { @@ -103,7 +137,9 @@ enum LEADING_BYTE_OFFICIAL_2 #define MIN_LEADING_BYTE_PRIVATE_2 0xF0 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF -#define NUM_LEADING_BYTES 128 +#endif + +#define NUM_LEADING_BYTES 256 /************************************************************************/ @@ -112,7 +148,9 @@ enum LEADING_BYTE_OFFICIAL_2 /* Is this leading byte for a private charset? */ +#ifndef UTF2000 #define LEADING_BYTE_PRIVATE_P(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1) +#endif /* Is this a prefix for a private leading byte? */ @@ -252,7 +290,9 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) +#ifdef LEADING_BYTE_PRIVATE_P #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs)) +#endif #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) @@ -270,7 +310,9 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) +#ifdef CHARSET_PRIVATE_P #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) +#endif #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) @@ -284,9 +326,9 @@ struct charset_lookup { Bufbyte next_allocated_2_byte_leading_byte; }; -INLINE_HEADER Lisp_Object CHARSET_BY_LEADING_BYTE (Bufbyte lb); +INLINE_HEADER Lisp_Object CHARSET_BY_LEADING_BYTE (int lb); INLINE_HEADER Lisp_Object -CHARSET_BY_LEADING_BYTE (Bufbyte lb) +CHARSET_BY_LEADING_BYTE (int lb) { extern struct charset_lookup *chlook; @@ -295,9 +337,9 @@ CHARSET_BY_LEADING_BYTE (Bufbyte lb) following unless we introduce `tem'. */ int tem = lb; type_checking_assert (tem >= MIN_LEADING_BYTE && - tem <= (MIN_LEADING_BYTE + NUM_LEADING_BYTES)); + tem < (MIN_LEADING_BYTE + NUM_LEADING_BYTES)); #endif - return chlook->charset_by_leading_byte[lb - 128]; + return chlook->charset_by_leading_byte[lb - MIN_LEADING_BYTE]; } INLINE_HEADER Lisp_Object @@ -327,18 +369,18 @@ INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE (int fb); INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE (int fb) { - if ( fb >= 0xfc ) - return 6; - else if ( fb >= 0xf8 ) - return 5; - else if ( fb >= 0xf0 ) - return 4; - else if ( fb >= 0xe0 ) - return 3; - else if ( fb >= 0xc0 ) + if ( fb < 0xc0 ) + return 1; + else if ( fb < 0xe0 ) return 2; + else if ( fb < 0xf0 ) + return 3; + else if ( fb < 0xf8 ) + return 4; + else if ( fb < 0xfc ) + return 5; else - return 1; + return 6; } #else /* MULE */ INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE (int fb); @@ -396,10 +438,20 @@ REP_BYTES_BY_FIRST_BYTE (int fb) /* Converting between field values and leading bytes. */ #define FIELD2_TO_OFFICIAL_LEADING_BYTE 0x80 -#define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 + +#ifdef UTF2000 + +#define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80 +#define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x80 +#define FIELD2_TO_PRIVATE_LEADING_BYTE 0xb0 + +#else #define FIELD1_TO_PRIVATE_LEADING_BYTE 0xc0 #define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x50 +#define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 + +#endif INLINE_HEADER Emchar CHAR_FIELD2 (Emchar c); INLINE_HEADER Emchar @@ -566,6 +618,12 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) return MULE_CHAR_PRIVATE_OFFSET | ((XCHARSET_LEADING_BYTE (charset) - FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1); +#ifdef UTF2000 + else + return MULE_CHAR_PRIVATE_OFFSET + | ((XCHARSET_LEADING_BYTE (charset) - + FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); +#else else if (!XCHARSET_PRIVATE_P (charset)) return MULE_CHAR_PRIVATE_OFFSET | ((XCHARSET_LEADING_BYTE (charset) - @@ -574,6 +632,7 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) return MULE_CHAR_PRIVATE_OFFSET | ((XCHARSET_LEADING_BYTE (charset) - FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); +#endif } /* The charset of character C is set to CHARSET, and the diff --git a/src/file-coding.c b/src/file-coding.c index c918dcd..215f21e 100644 --- a/src/file-coding.c +++ b/src/file-coding.c @@ -3820,6 +3820,7 @@ static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, unsigned_char_dynarr *dst, Lstream_data_count n) { +#ifndef UTF2000 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); unsigned int flags = str->flags; unsigned int ch = str->ch; @@ -3948,6 +3949,7 @@ encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, str->iso2022.current_charset = charset; /* Verbum caro factum est! */ +#endif } @@ -5307,7 +5309,11 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, break; case 3: /* one-byte private or two-byte official */ +#ifdef UTF2000 + if (XCHARSET_DIMENSION (charset) == 1) +#else if (XCHARSET_PRIVATE_P (charset)) +#endif { DECODE_OUTPUT_PARTIAL_CHAR (ch); #ifdef UTF2000 diff --git a/src/mule-charset.c b/src/mule-charset.c index bc84a0b..ff4fceb 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -57,6 +57,13 @@ Lisp_Object Vcharset_korean_ksc5601; Lisp_Object Vcharset_japanese_jisx0212; Lisp_Object Vcharset_chinese_cns11643_1; Lisp_Object Vcharset_chinese_cns11643_2; +#ifdef UTF2000 +Lisp_Object Vcharset_chinese_cns11643_3; +Lisp_Object Vcharset_chinese_cns11643_4; +Lisp_Object Vcharset_chinese_cns11643_5; +Lisp_Object Vcharset_chinese_cns11643_6; +Lisp_Object Vcharset_chinese_cns11643_7; +#endif Lisp_Object Vcharset_chinese_big5_1; Lisp_Object Vcharset_chinese_big5_2; @@ -152,6 +159,13 @@ Lisp_Object Qascii, Qjapanese_jisx0212, Qchinese_cns11643_1, Qchinese_cns11643_2, +#ifdef UTF2000 + Qchinese_cns11643_3, + Qchinese_cns11643_4, + Qchinese_cns11643_5, + Qchinese_cns11643_6, + Qchinese_cns11643_7, +#endif Qchinese_big5_1, Qchinese_big5_2, Qcomposite; @@ -1408,6 +1422,13 @@ syms_of_mule_charset (void) defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212"); defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); +#ifdef UTF2000 + defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3"); + defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4"); + defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5"); + defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6"); + defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7"); +#endif defsymbol (&Qchinese_big5_1, "chinese-big5-1"); defsymbol (&Qchinese_big5_2, "chinese-big5-2"); @@ -1419,12 +1440,6 @@ syms_of_mule_charset (void) Version number of UTF-2000. */ ); #endif - - leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1; - DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /* -Leading-code of private TYPE9N charset of column-width 1. -*/ ); - leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1; } void @@ -1446,7 +1461,17 @@ vars_of_mule_charset (void) chlook->charset_by_attributes[i][j][k] = Qnil; chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; +#ifdef UTF2000 + chlook->next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1; +#else chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; +#endif + + leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1; + DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /* +Leading-code of private TYPE9N charset of column-width 1. +*/ ); + leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1; } void @@ -1653,6 +1678,58 @@ complex_vars_of_mule_charset (void) build_string ("CNS 11643 Plane 2 Chinese traditional"), build_string (CHINESE_CNS_PLANE_RE("2"))); +#if 0 /*def UTF2000 */ + staticpro (&Vcharset_chinese_cns11643_3); + Vcharset_chinese_cns11643_3 = + make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3, + CHARSET_TYPE_94X94, 2, 0, 'I', + CHARSET_LEFT_TO_RIGHT, + build_string ("CNS11643-3"), + build_string ("CNS11643-3 (Chinese traditional)"), + build_string + ("CNS 11643 Plane 3 Chinese traditional"), + build_string (CHINESE_CNS_PLANE_RE("3"))); + staticpro (&Vcharset_chinese_cns11643_4); + Vcharset_chinese_cns11643_4 = + make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3, + CHARSET_TYPE_94X94, 2, 0, 'J', + CHARSET_LEFT_TO_RIGHT, + build_string ("CNS11643-4"), + build_string ("CNS11643-4 (Chinese traditional)"), + build_string + ("CNS 11643 Plane 4 Chinese traditional"), + build_string (CHINESE_CNS_PLANE_RE("4"))); + staticpro (&Vcharset_chinese_cns11643_5); + Vcharset_chinese_cns11643_5 = + make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3, + CHARSET_TYPE_94X94, 2, 0, 'K', + CHARSET_LEFT_TO_RIGHT, + build_string ("CNS11643-5"), + build_string ("CNS11643-5 (Chinese traditional)"), + build_string + ("CNS 11643 Plane 5 Chinese traditional"), + build_string (CHINESE_CNS_PLANE_RE("5"))); + staticpro (&Vcharset_chinese_cns11643_6); + Vcharset_chinese_cns11643_6 = + make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3, + CHARSET_TYPE_94X94, 2, 0, 'L', + CHARSET_LEFT_TO_RIGHT, + build_string ("CNS11643-6"), + build_string ("CNS11643-6 (Chinese traditional)"), + build_string + ("CNS 11643 Plane 6 Chinese traditional"), + build_string (CHINESE_CNS_PLANE_RE("6"))); + staticpro (&Vcharset_chinese_cns11643_7); + Vcharset_chinese_cns11643_7 = + make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3, + CHARSET_TYPE_94X94, 2, 0, 'M', + CHARSET_LEFT_TO_RIGHT, + build_string ("CNS11643-7"), + build_string ("CNS11643-7 (Chinese traditional)"), + build_string + ("CNS 11643 Plane 7 Chinese traditional"), + build_string (CHINESE_CNS_PLANE_RE("7"))); +#endif staticpro (&Vcharset_chinese_big5_1); Vcharset_chinese_big5_1 = make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, -- 1.7.10.4