+1999-08-28 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule/chinese.el (chinese-cns11643-3): Don't define if `utf-2000'
+ is provided because it is defined as builtin charset.
+ (chinese-cns11643-4): Likewise.
+ (chinese-cns11643-5): Likewise.
+ (chinese-cns11643-6): Likewise.
+ (chinese-cns11643-7): Likewise.
+
1999-06-17 MORIOKA Tomohiko <tomo@etl.go.jp>
* bytecomp.el (byte-compile-insert-header): Use utf-8 as
;; that appear once in some ancient manuscript and whose meaning
;; is unknown.
-(flet
- ((make-chinese-cns11643-charset
- (name plane final)
- (make-charset
- name (concat "CNS 11643 Plane " plane " (Chinese traditional)")
- `(registry
- ,(concat "CNS11643[.-]\\(.*[.-]\\)?" plane "$")
- dimension 2
- chars 94
- final ,final
- graphic 0))
- (modify-syntax-entry name "w")
- (modify-category-entry name ?t)
- ))
- (make-chinese-cns11643-charset 'chinese-cns11643-3 "3" ?I)
- (make-chinese-cns11643-charset 'chinese-cns11643-4 "4" ?J)
- (make-chinese-cns11643-charset 'chinese-cns11643-5 "5" ?K)
- (make-chinese-cns11643-charset 'chinese-cns11643-6 "6" ?L)
- (make-chinese-cns11643-charset 'chinese-cns11643-7 "7" ?M)
- )
+(unless (featurep 'utf-2000)
+ (flet
+ ((make-chinese-cns11643-charset
+ (name plane final)
+ (make-charset
+ name (concat "CNS 11643 Plane " plane " (Chinese traditional)")
+ `(registry
+ ,(concat "CNS11643[.-]\\(.*[.-]\\)?" plane "$")
+ dimension 2
+ chars 94
+ final ,final
+ graphic 0))
+ (modify-syntax-entry name "w")
+ (modify-category-entry name ?t)
+ ))
+ (make-chinese-cns11643-charset 'chinese-cns11643-3 "3" ?I)
+ (make-chinese-cns11643-charset 'chinese-cns11643-4 "4" ?J)
+ (make-chinese-cns11643-charset 'chinese-cns11643-5 "5" ?K)
+ (make-chinese-cns11643-charset 'chinese-cns11643-6 "6" ?L)
+ (make-chinese-cns11643-charset 'chinese-cns11643-7 "7" ?M)
+ ))
;; ISO-IR-165 (CCITT Extended GB)
;; It is based on CCITT Recommendation T.101, includes GB 2312-80 +
+1999-08-30 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * character.h (LEADING_BYTE_COMPOSITE): Deleted.
+
+1999-08-29 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * character.h (REP_BYTES_BY_FIRST_BYTE): Change order of
+ condition.
+
+1999-08-28 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * character.h (LEADING_BYTE_PRIVATE_P): Don't define in UTF2000.
+ (CHARSET_PRIVATE_P): Likewise.
+ (XCHARSET_PRIVATE_P): Likewise.
+ (MAKE_CHAR): Don't use XCHARSET_PRIVATE_P in UTF2000.
+
+ * file-coding.c (encode_coding_ucs4): Don't implement in UTF2000.
+ (decode_coding_iso2022): Don't use XCHARSET_PRIVATE_P in UTF2000.
+
+1999-08-28 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * character.h (LEADING_BYTE_*): Changed in UTF2000.
+ (NUM_LEADING_BYTES): Changed from 128 to 256.
+ (FIELD1_TO_PRIVATE_LEADING_BYTE): Change value to 0x80 in UTF2000.
+ (FIELD1_TO_OFFICIAL_LEADING_BYTE): Change value to 0x80 in
+ UTF2000.
+ (FIELD2_TO_PRIVATE_LEADING_BYTE): Change value to 0x80 in UTF2000.
+
+ * mule-charset.c (Vcharset_chinese_cns11643_3): New variable in
+ UTF2000.
+ (Vcharset_chinese_cns11643_4): New variable in UTF2000.
+ (Vcharset_chinese_cns11643_5): New variable in UTF2000.
+ (Vcharset_chinese_cns11643_6): New variable in UTF2000.
+ (Vcharset_chinese_cns11643_7): New variable in UTF2000.
+ (Qchinese_cns11643_3): New variable in UTF2000.
+ (Qchinese_cns11643_4): New variable in UTF2000.
+ (Qchinese_cns11643_5): New variable in UTF2000.
+ (Qchinese_cns11643_6): New variable in UTF2000.
+ (Qchinese_cns11643_7): New variable in UTF2000.
+ (syms_of_mule_charset): Define `chinese-cns11643-3',
+ `chinese-cns11643-4', `chinese-cns11643-5', `chinese-cns11643-6'
+ and `chinese-cns11643-7' in UTF2000.
+ (vars_of_mule_charset): Initialize
+ next_allocated_2_byte_leading_byte by LEADING_BYTE_CHINESE_BIG5_2
+ + 1 in UTF2000.
+ (complex_vars_of_mule_charset): Setup charset
+ `chinese-cns11643-3', `chinese-cns11643-4', `chinese-cns11643-5',
+ `chinese-cns11643-6' and `chinese-cns11643-7' in UTF2000.
+
1999-08-27 MORIOKA Tomohiko <tomo@etl.go.jp>
* mule-charset.c: Move setting for `leading-code-private-11' from
#define MIN_LEADING_BYTE 0x80
/* These need special treatment in a string and/or character */
#define LEADING_BYTE_ASCII 0x8E /* Omitted in a buffer */
-#ifdef ENABLE_COMPOSITE_CHARS
-#endif
-#define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
#define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
/* Note the gap in each official charset can cause core dump
#define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1
#define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9
+#ifdef UTF2000
+
+#define LEADING_BYTE_CHINESE_BIG5_1 0xB0 /* Big5 Level 1 */
+#define LEADING_BYTE_CHINESE_BIG5_2 0xB1 /* Big5 Level 2 */
+#define MIN_LEADING_BYTE_PRIVATE_2 0xB0
+#define MAX_LEADING_BYTE_PRIVATE_2 0xBF
+
+/** The following are for 2-byte characters in an official charset. **/
+
+#define LEADING_BYTE_JAPANESE_JISX0208_1978 0xC0/* Japanese JIS X0208-1978 */
+#define LEADING_BYTE_CHINESE_GB2312 0xC1 /* Chinese Hanzi GB2312-1980 */
+#define LEADING_BYTE_JAPANESE_JISX0208 0xC2 /* Japanese JIS X0208-1983 */
+#define LEADING_BYTE_KOREAN_KSC5601 0xC3 /* Hangul KS C5601-1987 */
+#define LEADING_BYTE_JAPANESE_JISX0212 0xC4 /* Japanese JIS X0212-1990 */
+#define LEADING_BYTE_CHINESE_CCITT_GB 0xC5 /* CCITT Extended GB */
+#define LEADING_BYTE_CHINESE_CNS11643_1 0xC7 /* Chinese CNS11643 Set 1 */
+#define LEADING_BYTE_CHINESE_CNS11643_2 0xC8 /* Chinese CNS11643 Set 2 */
+#define LEADING_BYTE_CHINESE_CNS11643_3 0xC9 /* Chinese CNS11643 Set 3 */
+#define LEADING_BYTE_CHINESE_CNS11643_4 0xCA /* Chinese CNS11643 Set 4 */
+#define LEADING_BYTE_CHINESE_CNS11643_5 0xCB /* Chinese CNS11643 Set 5 */
+#define LEADING_BYTE_CHINESE_CNS11643_6 0xCC /* Chinese CNS11643 Set 6 */
+#define LEADING_BYTE_CHINESE_CNS11643_7 0xCD /* Chinese CNS11643 Set 7 */
+#define LEADING_BYTE_KOREAN_KPS9566 0xCE /* DPRK Hangul KPS 9566-1997 */
+
+#define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978
+#define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_KOREAN_KPS9566
+
+/** The following are for 1- and 2-byte characters in a private charset. **/
+
+#define PRE_LEADING_BYTE_PRIVATE_1 0x120 /* 1-byte char-set */
+#define PRE_LEADING_BYTE_PRIVATE_2 0x121 /* 2-byte char-set */
+
+#define MIN_LEADING_BYTE_PRIVATE_1 0x0D0
+#define MAX_LEADING_BYTE_PRIVATE_1 0x11f
+
+#else
+
/** The following are for 2-byte characters in an official charset. **/
enum LEADING_BYTE_OFFICIAL_2
{
#define MIN_LEADING_BYTE_PRIVATE_2 0xF0
#define MAX_LEADING_BYTE_PRIVATE_2 0xFF
-#define NUM_LEADING_BYTES 128
+#endif
+
+#define NUM_LEADING_BYTES 256
\f
/************************************************************************/
/* Is this leading byte for a private charset? */
+#ifndef UTF2000
#define LEADING_BYTE_PRIVATE_P(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1)
+#endif
/* Is this a prefix for a private leading byte? */
#define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
+#ifdef LEADING_BYTE_PRIVATE_P
#define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs))
+#endif
#define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
#define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
#define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs))
#define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs))
#define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
+#ifdef CHARSET_PRIVATE_P
#define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs))
+#endif
#define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
Bufbyte next_allocated_2_byte_leading_byte;
};
-INLINE_HEADER Lisp_Object CHARSET_BY_LEADING_BYTE (Bufbyte lb);
+INLINE_HEADER Lisp_Object CHARSET_BY_LEADING_BYTE (int lb);
INLINE_HEADER Lisp_Object
-CHARSET_BY_LEADING_BYTE (Bufbyte lb)
+CHARSET_BY_LEADING_BYTE (int lb)
{
extern struct charset_lookup *chlook;
following unless we introduce `tem'. */
int tem = lb;
type_checking_assert (tem >= MIN_LEADING_BYTE &&
- tem <= (MIN_LEADING_BYTE + NUM_LEADING_BYTES));
+ tem < (MIN_LEADING_BYTE + NUM_LEADING_BYTES));
#endif
- return chlook->charset_by_leading_byte[lb - 128];
+ return chlook->charset_by_leading_byte[lb - MIN_LEADING_BYTE];
}
INLINE_HEADER Lisp_Object
INLINE_HEADER int
REP_BYTES_BY_FIRST_BYTE (int fb)
{
- if ( fb >= 0xfc )
- return 6;
- else if ( fb >= 0xf8 )
- return 5;
- else if ( fb >= 0xf0 )
- return 4;
- else if ( fb >= 0xe0 )
- return 3;
- else if ( fb >= 0xc0 )
+ if ( fb < 0xc0 )
+ return 1;
+ else if ( fb < 0xe0 )
return 2;
+ else if ( fb < 0xf0 )
+ return 3;
+ else if ( fb < 0xf8 )
+ return 4;
+ else if ( fb < 0xfc )
+ return 5;
else
- return 1;
+ return 6;
}
#else /* MULE */
INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE (int fb);
/* Converting between field values and leading bytes. */
#define FIELD2_TO_OFFICIAL_LEADING_BYTE 0x80
-#define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80
+
+#ifdef UTF2000
+
+#define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80
+#define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x80
+#define FIELD2_TO_PRIVATE_LEADING_BYTE 0xb0
+
+#else
#define FIELD1_TO_PRIVATE_LEADING_BYTE 0xc0
#define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x50
+#define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80
+
+#endif
INLINE_HEADER Emchar CHAR_FIELD2 (Emchar c);
INLINE_HEADER Emchar
return MULE_CHAR_PRIVATE_OFFSET
| ((XCHARSET_LEADING_BYTE (charset) -
FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1);
+#ifdef UTF2000
+ else
+ return MULE_CHAR_PRIVATE_OFFSET
+ | ((XCHARSET_LEADING_BYTE (charset) -
+ FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
+#else
else if (!XCHARSET_PRIVATE_P (charset))
return MULE_CHAR_PRIVATE_OFFSET
| ((XCHARSET_LEADING_BYTE (charset) -
return MULE_CHAR_PRIVATE_OFFSET
| ((XCHARSET_LEADING_BYTE (charset) -
FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
+#endif
}
/* The charset of character C is set to CHARSET, and the
encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
unsigned_char_dynarr *dst, Lstream_data_count n)
{
+#ifndef UTF2000
struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
unsigned int flags = str->flags;
unsigned int ch = str->ch;
str->iso2022.current_charset = charset;
/* Verbum caro factum est! */
+#endif
}
\f
break;
case 3: /* one-byte private or two-byte official */
+#ifdef UTF2000
+ if (XCHARSET_DIMENSION (charset) == 1)
+#else
if (XCHARSET_PRIVATE_P (charset))
+#endif
{
DECODE_OUTPUT_PARTIAL_CHAR (ch);
#ifdef UTF2000
Lisp_Object Vcharset_japanese_jisx0212;
Lisp_Object Vcharset_chinese_cns11643_1;
Lisp_Object Vcharset_chinese_cns11643_2;
+#ifdef UTF2000
+Lisp_Object Vcharset_chinese_cns11643_3;
+Lisp_Object Vcharset_chinese_cns11643_4;
+Lisp_Object Vcharset_chinese_cns11643_5;
+Lisp_Object Vcharset_chinese_cns11643_6;
+Lisp_Object Vcharset_chinese_cns11643_7;
+#endif
Lisp_Object Vcharset_chinese_big5_1;
Lisp_Object Vcharset_chinese_big5_2;
Qjapanese_jisx0212,
Qchinese_cns11643_1,
Qchinese_cns11643_2,
+#ifdef UTF2000
+ Qchinese_cns11643_3,
+ Qchinese_cns11643_4,
+ Qchinese_cns11643_5,
+ Qchinese_cns11643_6,
+ Qchinese_cns11643_7,
+#endif
Qchinese_big5_1,
Qchinese_big5_2,
Qcomposite;
defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
+#ifdef UTF2000
+ defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
+ defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
+ defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
+ defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
+ defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
+#endif
defsymbol (&Qchinese_big5_1, "chinese-big5-1");
defsymbol (&Qchinese_big5_2, "chinese-big5-2");
Version number of UTF-2000.
*/ );
#endif
-
- leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
- DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
-Leading-code of private TYPE9N charset of column-width 1.
-*/ );
- leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
}
void
chlook->charset_by_attributes[i][j][k] = Qnil;
chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
+#ifdef UTF2000
+ chlook->next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
+#else
chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
+#endif
+
+ leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
+ DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
+Leading-code of private TYPE9N charset of column-width 1.
+*/ );
+ leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
}
void
build_string
("CNS 11643 Plane 2 Chinese traditional"),
build_string (CHINESE_CNS_PLANE_RE("2")));
+#if 0 /*def UTF2000 */
+ staticpro (&Vcharset_chinese_cns11643_3);
+ Vcharset_chinese_cns11643_3 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'I',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-3"),
+ build_string ("CNS11643-3 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 3 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("3")));
+ staticpro (&Vcharset_chinese_cns11643_4);
+ Vcharset_chinese_cns11643_4 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'J',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-4"),
+ build_string ("CNS11643-4 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 4 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("4")));
+ staticpro (&Vcharset_chinese_cns11643_5);
+ Vcharset_chinese_cns11643_5 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'K',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-5"),
+ build_string ("CNS11643-5 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 5 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("5")));
+ staticpro (&Vcharset_chinese_cns11643_6);
+ Vcharset_chinese_cns11643_6 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'L',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-6"),
+ build_string ("CNS11643-6 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 6 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("6")));
+ staticpro (&Vcharset_chinese_cns11643_7);
+ Vcharset_chinese_cns11643_7 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'M',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-7"),
+ build_string ("CNS11643-7 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 7 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("7")));
+#endif
staticpro (&Vcharset_chinese_big5_1);
Vcharset_chinese_big5_1 =
make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,