From 6b57beba75b3e9c5209fa23f45f3e57900cdc8af Mon Sep 17 00:00:00 2001 From: tomo Date: Mon, 16 Dec 2002 05:06:18 +0000 Subject: [PATCH] Basically sync with r21-2-19-utf-2000-0_7-0. --- lisp/ChangeLog | 15 + lisp/custom-load.el | 2 +- lisp/mule/mule-charset.el | 23 ++ src/ChangeLog | 238 ++++++++++++++ src/char-lb.h | 2 + src/char-ucs.h | 285 ++++------------- src/depend | 1 + src/file-coding.c | 74 ++--- src/file-coding.h | 4 - src/indent.c | 6 +- src/insdel.c | 4 +- src/mule-charset.c | 715 ++++++++++++++--------------------------- src/objects-tty.c | 2 +- src/text-coding.c | 782 +++++++++++---------------------------------- 14 files changed, 808 insertions(+), 1345 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index b78dd2f..84c6eb5 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,18 @@ +1999-09-14 MORIOKA Tomohiko + + * mule/mule-charset.el (default-coded-charset-priority-list): Add + thai-tis620 and hebrew-iso8859-8; namely all BMP are covered. + +1999-09-13 MORIOKA Tomohiko + + * mule/mule-charset.el (default-coded-charset-priority-list): Add + `cyrillic-iso8859-5' and `greek-iso8859-7'. + +1999-09-13 MORIOKA Tomohiko + + * mule/mule-charset.el (default-coded-charset-priority-list): + Setup in UTF-2000. + 1999-09-09 MORIOKA Tomohiko * mule/viet-chars.el: Don't define charset diff --git a/lisp/custom-load.el b/lisp/custom-load.el index 7f0c64b..c7bddd7 100644 --- a/lisp/custom-load.el +++ b/lisp/custom-load.el @@ -73,7 +73,7 @@ (custom-add-loads 'fill '("simple" "fill")) (custom-add-loads 'custom-magic-faces '("cus-edit")) (custom-add-loads 'display '("modeline" "toolbar" "scrollbar" "auto-show")) -(custom-add-loads 'faces '("faces" "font" "cus-edit" "font-lock" "hyper-apropos" "info" "wid-edit")) +(custom-add-loads 'faces '("faces" "cus-edit" "font-lock" "font" "hyper-apropos" "info" "wid-edit")) (custom-add-loads 'emacs '("faces" "help" "files" "cus-edit" "package-get")) (custom-add-loads 'processes '("process" "gnuserv")) (custom-add-loads 'hyper-apropos '("hyper-apropos")) diff --git a/lisp/mule/mule-charset.el b/lisp/mule/mule-charset.el index 9dd8fef..bd61908 100644 --- a/lisp/mule/mule-charset.el +++ b/lisp/mule/mule-charset.el @@ -3,6 +3,8 @@ ;; Copyright (C) 1992 Free Software Foundation, Inc. ;; Copyright (C) 1995 Amdahl Corporation. ;; Copyright (C) 1996 Sun Microsystems. +;; Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. +;; Licensed to the Free Software Foundation. ;; Author: Unknown ;; Keywords: i18n, mule, internal @@ -258,4 +260,25 @@ DESCRIPTION (string) is the description string of the charset." (put-char-table (car l) t auto-fill-chars) (setq l (cdr l)))) + +;;; @ Coded character set +;;; + +(when (featurep 'utf-2000) + (setq default-coded-charset-priority-list + '(ascii + control-1 + latin-iso8859-1 + latin-iso8859-2 + latin-iso8859-3 + latin-iso8859-4 + latin-iso8859-9 + latin-jisx0201 + cyrillic-iso8859-5 + greek-iso8859-7 + thai-tis620 + hebrew-iso8859-8 + vietnamese-viscii-lower + vietnamese-viscii-upper))) + ;;; mule-charset.el ends here diff --git a/src/ChangeLog b/src/ChangeLog index a41ff49..91959cb 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,241 @@ +1999-09-14 MORIOKA Tomohiko + + * char-ucs.h (breakup_char_1): Use + `Vdefault_coded_charset_priority_list' for hebrew-iso8859-8, + thai-tis620 and katakana-jisx0201 area. + +1999-09-13 MORIOKA Tomohiko + + * char-ucs.h (breakup_char_1): Use + `Vdefault_coded_charset_priority_list' for cyrillic-iso8859-5 + area. + + * text-coding.c (reset_encoding_stream): Fixed. + (char_encode_ucs4): Delete `& 255'. + + * char-ucs.h (breakup_char_1): Use + `Vdefault_coded_charset_priority_list' for greek-iso8859-7 area. + +1999-09-13 MORIOKA Tomohiko + + * file-coding.c (Fmake_coding_system): Don't set up + `codesys->fixed.size'. + (encode_coding_no_conversion): Don't refer + `str->codesys->fixed.size'. + +1999-09-13 MORIOKA Tomohiko + + * mule-charset.c, char-ucs.h (latin_a_char_to_charset): Deleted. + (latin_a_char_to_byte1): Deleted. + (latin_a_char_to_byte2): Deleted. + +1999-09-13 MORIOKA Tomohiko + + * mule-charset.c (make_charset): Add new argument `ucs_min', + `ucs_max' and `code_offset'. + (charset_get_byte1): New implementation [delete specific charset + depended implementations]. + (Fmake_charset): Modify for `make_charset'. + (Fmake_reverse_direction_charset): Likewise. + (complex_vars_of_mule_charset): Likewise. + + * char-ucs.h (struct Lisp_Charset): Add `ucs_min', `ucs_max' and + `code_offset'. + (CHARSET_UCS_MIN): New macro. + (CHARSET_UCS_MAX): New macro. + (CHARSET_CODE_OFFSET): New macro. + (MAKE_CHAR): Delete charset depended definitions [except + katakana-jisx0201]. + +1999-09-13 MORIOKA Tomohiko + + * char-ucs.h (breakup_char_1): Use + `Vdefault_coded_charset_priority_list' for C0-Controls, + Basic-Latin, C1-Controls and Latin-1-Supplement area. + +1999-09-13 MORIOKA Tomohiko + + * char-ucs.h (charset_get_byte1): New function. + (XCHARSET_GET_BYTE1): Deleted. + (charset_get_byte2): New function. + (XCHARSET_GET_BYTE2): Deleted. + (Vdefault_coded_charset_priority_list): New external variable. + (breakup_char_1): Use `charset_get_byte1', `charset_get_byte2' and + `Vdefault_preferred_coded_charset_list'. + + * mule-charset.c (charset_get_byte1): New function. + (charset_get_byte2): New function. + (Vdefault_coded_charset_priority_list): New variable. + (vars_of_mule_charset): Add new variable + `default-coded-charset-priority-list'. + +1999-09-12 MORIOKA Tomohiko + + * char-ucs.h (XCHARSET_GET_BYTE1): New inline function. + (XCHARSET_GET_BYTE2): New inline function. + (breakup_char_1): Use `XCHARSET_GET_BYTE1' and + `XCHARSET_GET_BYTE2'. + +1999-09-12 MORIOKA Tomohiko + + * mule-charset.c (make_charset): Initialize + `CHARSET_TO_BYTE1_TABLE(cs)' and `CHARSET_TO_BYTE2_TABLE(cs)' by + NULL if table is not defined. + +1999-09-11 MORIOKA Tomohiko + + * text-coding.c (char_encode_shift_jis): Use + `XCHARSET_TO_BYTE1_TABLE' for `Vcharset_latin_jisx0201' instead of + `ucs_to_latin_jisx0201'. + + * mule-charset.c (ucs_to_latin_jisx0201): Deleted. + (ucs_to_latin_iso8859_2): Deleted. + (ucs_to_latin_iso8859_3): Deleted. + (ucs_to_latin_iso8859_4): Deleted. + (ucs_to_latin_iso8859_9): Deleted. + (ucs_to_latin_viscii_lower): Deleted. + (ucs_to_latin_viscii_upper): Deleted. + (ucs_to_latin_tcvn5712): Deleted. + (make_charset): Add new argument `decoding_table'; set up + `CHARSET_DECODING_TABLE(cs)' in UTF-2000; set up + `CHARSET_TO_BYTE1_TABLE(cs)' for 94-set and 96-set if + `decoding_table' is defined in UTF-2000. + (Fmake_charset): Modify for `make_charset'. + (Fmake_reverse_direction_charset): Likewise. + (complex_vars_of_mule_charset): Likewise; delete `GENERATE_94_SET' + and `GENERATE_96_SET'. + + * char-ucs.h (latin_jisx0201_to_ucs): Deleted. + (ucs_to_latin_jisx0201): Deleted. + (latin_iso8859_2_to_ucs): Deleted. + (ucs_to_latin_iso8859_2): Deleted. + (latin_iso8859_3_to_ucs): Deleted. + (ucs_to_latin_iso8859_3): Deleted. + (latin_iso8859_4_to_ucs): Deleted. + (ucs_to_latin_iso8859_4): Deleted. + (latin_iso8859_9_to_ucs): Deleted. + (ucs_to_latin_iso8859_9): Deleted. + (latin_viscii_lower_to_ucs): Deleted. + (ucs_to_latin_viscii_lower): Deleted. + (latin_viscii_upper_to_ucs): Deleted. + (ucs_to_latin_viscii_upper): Deleted. + (struct Lisp_Charset): Renamed `encoding_table' to + `to_byte1_table'; add `to_byte2_table'. + (CHARSET_DECODING_TABLE): New macro. + (CHARSET_TO_BYTE1_TABLE): New macro. + (CHARSET_TO_BYTE2_TABLE): New macro. + (XCHARSET_DECODING_TABLE): New macro. + (XCHARSET_TO_BYTE1_TABLE): New macro. + (XCHARSET_TO_BYTE2_TABLE): New macro. + (MAKE_CHAR): Use `XCHARSET_DECODING_TABLE'; don't use `*_to_ucs' + tables. + (breakup_char_1): Use `XCHARSET_TO_BYTE1_TABLE' if it is defined; + don't use `ucs_to_*' tables. + +1999-09-11 MORIOKA Tomohiko + + * text-coding.c (Fmake_coding_system): Don't set up + `codesys->fixed.size'. + (encode_coding_no_conversion): Use `if' instead of `switch'. + + * file-coding.h (struct Lisp_Coding_System): Delete `fixed.size'. + +1999-09-11 MORIOKA Tomohiko + + * mule-charset.c (make_charset): Delete argument `rep_bytes'. + (Fmake_charset): Modify for `make_charset'. + (Fmake_reverse_direction_charset): Likewise. + (complex_vars_of_mule_charset): Likewise. + +1999-09-11 MORIOKA Tomohiko + + * text-coding.c (char_encode_shift_jis): Use table + `ucs_to_latin_jisx0201' and BREAKUP_CHAR. + +1999-09-11 MORIOKA Tomohiko + + * text-coding.c (text_encode_generic): Use `if' instead of + `switch'. + (decode_coding_sjis): Use `MAKE_CHAR' and `DECODE_ADD_UCS_CHAR' to + decode JIS-Latin. + +1999-09-10 MORIOKA Tomohiko + + * text-coding.c (encode_coding_sjis): Deleted. + (char_encode_shift_jis): New function. + (char_finish_shift_jis): New function. + (reset_encoding_stream): Set up `encode_char' and `finish' for + `CODESYS_UCS4' and `CODESYS_SHIFT_JIS'. + (mule_encode): Use generic encoder for `CODESYS_SHIFT_JIS'. + (char_encode_utf8): Treat `eol_type'. + +1999-09-10 MORIOKA Tomohiko + + * file-coding.c (decode_coding_iso2022): Use + `DECODE_ADD_UCS_CHAR'; don't use `XCHARSET_REP_BYTES'. + +1999-09-10 MORIOKA Tomohiko + + * mule-charset.c (vars_of_mule_charset): Update `utf-2000-version' + to 0.7 (Hirano). + +1999-09-10 MORIOKA Tomohiko + + * char-lb.h (CHAR_COLUMNS): New macro. + +1999-09-09 MORIOKA Tomohiko + + * text-coding.c (char_encode_ucs4): New function. + (char_finish_ucs4): New function. + (encode_coding_ucs4): Deleted. + (mule_encode): Use generic encoder for `CODESYS_UCS4'. + (text_encode_generic): Delete local variable `charset' and `half'. + (ucs_to_mule_table): Deleted. + (mule_to_ucs_table): Deleted. + (Fset_ucs_char): Deleted. + (ucs_to_char): Deleted. + (Fucs_char): Deleted. + (Fset_char_ucs): Deleted. + (Fchar_ucs): Deleted. + (decode_ucs4): Deleted. + (mule_char_to_ucs4): Deleted. + (encode_ucs4): Deleted. + (decode_coding_ucs4): Use `DECODE_ADD_UCS_CHAR'. + (decode_coding_utf8): Likewise. + (decode_coding_iso2022): Likewise; don't use `XCHARSET_REP_BYTES'. + (char_encode_iso2022): Fixed. + (syms_of_file_coding): Delete `Fset_ucs_char', `Fucs_char', + `Fset_char_ucs' and `Fchar_ucs'. + (complex_vars_of_file_coding): Don't initialize + `ucs_to_mule_table'. + + * objects-tty.c (tty_initialize_font_instance): Don't use + `XCHARSET_COLUMNS'. + + * mule-charset.c (make_charset): Don't set up CHARSET_REP_BYTES in + UTF-2000. + + * redisplay-tty.c (tty_output_display_block): Use `CHAR_COLUMNS' + instead of `XCHARSET_COLUMNS' and `CHAR_CHARSET'. + + * insdel.c (bufbyte_string_displayed_columns): Use `CHAR_COLUMNS' + instead of `XCHARSET_COLUMNS' and `CHAR_CHARSET'. + (emchar_string_displayed_columns): Likewise. + + * indent.c (column_at_point): Use `CHAR_COLUMNS' instead of + `XCHARSET_COLUMNS' and `CHAR_CHARSET'. + (string_column_at_point): Likewise. + (Fmove_to_column): Likewise. + + * char-ucs.h (struct Lisp_Charset): Delete `rep_bytes'; add + `encoding_table' and `decoding_table'. + (CHARSET_REP_BYTES): Deleted. + (XCHARSET_REP_BYTES): Deleted. + (XCHARSET_COLUMNS): Deleted. + (CHAR_COLUMNS): New macro. + (lookup_composite_char): Deleted unconditionally. + (composite_char_string): Likewise. + 1999-09-09 MORIOKA Tomohiko * char-ucs.h (Emchar_to_byte_table): New type. diff --git a/src/char-lb.h b/src/char-lb.h index a9bbce9..01ec5d5 100644 --- a/src/char-lb.h +++ b/src/char-lb.h @@ -35,4 +35,6 @@ valid_char_p (Emchar ch) return ((unsigned int) (ch) <= 0xff) || non_ascii_valid_char_p (ch); } +#define CHAR_COLUMNS(c) (XCHARSET_COLUMNS(CHAR_CHARSET(c))) + #endif /* _XEMACS_CHAR_LB_H */ diff --git a/src/char-ucs.h b/src/char-ucs.h index 9482331..0418aea 100644 --- a/src/char-ucs.h +++ b/src/char-ucs.h @@ -29,32 +29,17 @@ Boston, MA 02111-1307, USA. */ #define CHAR_ASCII_P(ch) ((ch) <= 0x7F) -unsigned char -get_byte_from_character_table (Emchar ch, - Emchar* table, size_t size, unsigned char offset); +int +get_byte_from_character_table (Emchar ch, Lisp_Object ccs); extern Lisp_Object Vcharset_ucs_bmp; - extern Lisp_Object Vcharset_latin_jisx0201; -extern Emchar latin_jisx0201_to_ucs[94]; - extern Lisp_Object Vcharset_latin_iso8859_2; -extern Emchar latin_iso8859_2_to_ucs[96]; - extern Lisp_Object Vcharset_latin_iso8859_3; -extern Emchar latin_iso8859_3_to_ucs[96]; - extern Lisp_Object Vcharset_latin_iso8859_4; -extern Emchar latin_iso8859_4_to_ucs[96]; - extern Lisp_Object Vcharset_latin_iso8859_9; -extern Emchar latin_iso8859_9_to_ucs[96]; - extern Lisp_Object Vcharset_latin_viscii_lower; -extern Emchar latin_viscii_lower_to_ucs[96]; - extern Lisp_Object Vcharset_latin_viscii_upper; -extern Emchar latin_viscii_upper_to_ucs[96]; /************************************************************************/ @@ -206,11 +191,6 @@ struct Lisp_Charset /* Final byte of this character set in ISO2022 designating escape sequence */ Bufbyte final; - /* Number of bytes (1 - 4) required in the internal representation - for characters in this character set. This is *not* the - same as the dimension of the character set). */ - unsigned int rep_bytes; - /* Number of columns a character in this charset takes up, on TTY devices. Not used for X devices. */ unsigned int columns; @@ -229,6 +209,15 @@ struct Lisp_Charset /* Which half of font to be used to display this character set */ unsigned int graphic; + + /* Byte->character mapping table */ + Emchar* decoding_table; + + /* Range of character code */ + Emchar ucs_min, ucs_max; + + /* Offset for external representation */ + Emchar code_offset; }; typedef struct Lisp_Charset Lisp_Charset; @@ -255,7 +244,6 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_NAME(cs) ((cs)->name) #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) #define CHARSET_LONG_NAME(cs) ((cs)->long_name) -#define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) #define CHARSET_COLUMNS(cs) ((cs)->columns) #define CHARSET_GRAPHIC(cs) ((cs)->graphic) #define CHARSET_TYPE(cs) ((cs)->type) @@ -267,14 +255,16 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_DIMENSION(cs) ((cs)->dimension) #define CHARSET_CHARS(cs) ((cs)->chars) #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) +#define CHARSET_DECODING_TABLE(cs) ((cs)->decoding_table) +#define CHARSET_UCS_MIN(cs) ((cs)->ucs_min) +#define CHARSET_UCS_MAX(cs) ((cs)->ucs_max) +#define CHARSET_CODE_OFFSET(cs) ((cs)->code_offset) #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs)) #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs)) -#define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) -#define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) @@ -287,6 +277,7 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) +#define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs)) struct charset_lookup { /* Table of charsets indexed by (leading byte - MIN_LEADING_BYTE). */ @@ -369,41 +360,19 @@ INLINE_HEADER Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2); INLINE_HEADER Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2) { - if (EQ (charset, Vcharset_ascii)) - return c1; - else if (EQ (charset, Vcharset_control_1)) - return c1 | 0x80; - else if (EQ (charset, Vcharset_ucs_bmp)) - return (c1 << 8) | c2; - else if (EQ (charset, Vcharset_latin_iso8859_1)) - return c1 | 0x80; - else if (EQ (charset, Vcharset_latin_iso8859_2)) - return latin_iso8859_2_to_ucs[c1 - 32]; - else if (EQ (charset, Vcharset_latin_iso8859_3)) - return latin_iso8859_3_to_ucs[c1 - 32]; - else if (EQ (charset, Vcharset_latin_iso8859_4)) - return latin_iso8859_4_to_ucs[c1 - 32]; - else if (EQ (charset, Vcharset_cyrillic_iso8859_5)) - return c1 + MIN_CHAR_CYRILLIC - 0x20; - else if (EQ (charset, Vcharset_greek_iso8859_7)) - return c1 + MIN_CHAR_GREEK - 0x20; - else if (EQ (charset, Vcharset_hebrew_iso8859_8)) - return c1 + MIN_CHAR_HEBREW - 0x20; - else if (EQ (charset, Vcharset_latin_iso8859_9)) - return latin_iso8859_9_to_ucs[c1 - 32]; - else if (EQ (charset, Vcharset_thai_tis620)) - return c1 + MIN_CHAR_THAI - 0x20; + Emchar* decoding_table; + + if ((decoding_table = XCHARSET_DECODING_TABLE (charset)) != NULL) + return decoding_table[c1 - (XCHARSET_CHARS (charset) == 94 ? 33 : 32)]; else if (EQ (charset, Vcharset_katakana_jisx0201)) if (c1 < 0x60) return c1 + MIN_CHAR_HALFWIDTH_KATAKANA - 0x20; else - return 32; - else if (EQ (charset, Vcharset_latin_jisx0201)) - return latin_jisx0201_to_ucs[c1 - 33]; - else if (EQ (charset, Vcharset_latin_viscii_lower)) - return latin_viscii_lower_to_ucs[c1 - 32]; - else if (EQ (charset, Vcharset_latin_viscii_upper)) - return latin_viscii_upper_to_ucs[c1 - 32]; + /* return MIN_CHAR_94 + ('I' - '0') * 94 + (c1 - 33); */ + return ' '; + else if (CHARSET_UCS_MAX (XCHARSET (charset))) + return c1 - CHARSET_CODE_OFFSET (XCHARSET (charset)) + + CHARSET_UCS_MIN (XCHARSET (charset)); else if (XCHARSET_DIMENSION (charset) == 1) { switch (XCHARSET_CHARS (charset)) @@ -436,175 +405,78 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) } } -extern Charset_ID latin_a_char_to_charset[128]; -extern unsigned char latin_a_char_to_byte1[128]; -extern unsigned char latin_a_char_to_byte2[128]; +unsigned char charset_get_byte1 (Lisp_Object charset, Emchar ch); +unsigned char charset_get_byte2 (Lisp_Object charset, Emchar ch); + +extern Lisp_Object Vdefault_coded_charset_priority_list; +EXFUN (Ffind_charset, 1); INLINE_HEADER void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2); INLINE_HEADER void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) { - if (c <= MAX_CHAR_BASIC_LATIN) - { - *charset = Vcharset_ascii; - *c1 = c; - *c2 = 0; - } - else if (c < 0xA0) - { - *charset = Vcharset_control_1; - *c1 = c & 0x7f; - *c2 = 0; - } - else if (c <= 0xff) - { - *charset = Vcharset_latin_iso8859_1; - *c1 = c & 0x7f; - *c2 = 0; - } - else if (c <= 0x17f) - { - *charset - = CHARSET_BY_LEADING_BYTE (latin_a_char_to_charset[c - 0x100]); - - if (EQ (*charset, Vcharset_latin_iso8859_2)) - { - *c1 = get_byte_from_character_table (c, latin_iso8859_2_to_ucs, 96, 32); - *c2 = 0; - } - else if (EQ (*charset, Vcharset_latin_iso8859_3)) - { - *c1 = get_byte_from_character_table (c, latin_iso8859_3_to_ucs, 96, 32); - *c2 = 0; - } - else if (EQ (*charset, Vcharset_latin_iso8859_4)) - { - *c1 = get_byte_from_character_table (c, latin_iso8859_4_to_ucs, 96, 32); - *c2 = 0; - } - else if (EQ (*charset, Vcharset_latin_iso8859_9)) - { - *c1 = get_byte_from_character_table (c, latin_iso8859_9_to_ucs, 96, 32); - *c2 = 0; - } - else - { - *c1 = latin_a_char_to_byte1[c - 0x100]; - *c2 = latin_a_char_to_byte2[c - 0x100]; - } - } - else if (c < MIN_CHAR_GREEK) + if (c < MIN_CHAR_94) { - if ( (*c1 = get_byte_from_character_table (c, latin_iso8859_2_to_ucs, - 96, 32)) ) + Lisp_Object charsets = Vdefault_coded_charset_priority_list; + while (!EQ (charsets, Qnil)) { - *charset = Vcharset_latin_iso8859_2; - *c2 = 0; + *charset = Ffind_charset (Fcar (charsets)); + if (!EQ (*charset, Qnil) + && (*c1 = charset_get_byte1 (*charset, c)) ) + { + *c2 = charset_get_byte2 (*charset, c); + return; + } + charsets = Fcdr (charsets); } - else if ( (*c1 = - get_byte_from_character_table (c, latin_iso8859_3_to_ucs, - 96, 32)) ) + /* otherwise --- maybe for bootstrap */ + if (c <= MAX_CHAR_BASIC_LATIN) { - *charset = Vcharset_latin_iso8859_3; - *c2 = 0; + *charset = Vcharset_ascii; + *c1 = charset_get_byte1 (*charset, c); + *c2 = charset_get_byte2 (*charset, c); } - else if ( (*c1 = - get_byte_from_character_table (c, latin_iso8859_4_to_ucs, - 96, 32)) ) + else if (c < 0xA0) { - *charset = Vcharset_latin_iso8859_4; - *c2 = 0; + *charset = Vcharset_control_1; + *c1 = charset_get_byte1 (*charset, c); + *c2 = charset_get_byte2 (*charset, c); } - else if ( (*c1 = - get_byte_from_character_table (c, latin_iso8859_9_to_ucs, - 96, 32)) ) + else if (c <= 0xff) { - *charset = Vcharset_latin_iso8859_9; - *c2 = 0; + *charset = Vcharset_latin_iso8859_1; + *c1 = charset_get_byte1 (*charset, c); + *c2 = charset_get_byte2 (*charset, c); } - else if ( (*c1 = - get_byte_from_character_table (c, latin_viscii_lower_to_ucs, - 96, 32)) ) + else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK)) { - *charset = Vcharset_latin_viscii_lower; + *charset = Vcharset_greek_iso8859_7; + *c1 = c - MIN_CHAR_GREEK + 0x20; *c2 = 0; } - else if ( (*c1 = - get_byte_from_character_table (c, latin_viscii_upper_to_ucs, - 96, 32)) ) + else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC)) { - *charset = Vcharset_latin_viscii_upper; + *charset = Vcharset_cyrillic_iso8859_5; + *c1 = c - MIN_CHAR_CYRILLIC + 0x20; *c2 = 0; } - else + else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW)) { - *charset = Vcharset_ucs_bmp; - *c1 = c >> 8; - *c2 = c & 0xff; - } - } - else if (c <= MAX_CHAR_GREEK) - { - *charset = Vcharset_greek_iso8859_7; - *c1 = c - MIN_CHAR_GREEK + 0x20; - *c2 = 0; - } - else if (c < MIN_CHAR_CYRILLIC) - { - *charset = Vcharset_ucs_bmp; - *c1 = c >> 8; - *c2 = c & 0xff; - } - else if (c <= MAX_CHAR_CYRILLIC) - { - *charset = Vcharset_cyrillic_iso8859_5; - *c1 = c - MIN_CHAR_CYRILLIC + 0x20; - *c2 = 0; - } - else if (c < MIN_CHAR_HEBREW) - { - *charset = Vcharset_ucs_bmp; - *c1 = c >> 8; - *c2 = c & 0xff; - } - else if (c <= MAX_CHAR_HEBREW) - { - *charset = Vcharset_hebrew_iso8859_8; - *c1 = c - MIN_CHAR_HEBREW + 0x20; - *c2 = 0; - } - else if (c < MIN_CHAR_THAI) - { - *charset = Vcharset_ucs_bmp; - *c1 = c >> 8; - *c2 = c & 0xff; - } - else if (c <= MAX_CHAR_THAI) - { - *charset = Vcharset_thai_tis620; - *c1 = c - MIN_CHAR_THAI + 0x20; - *c2 = 0; - } - else if (c < MIN_CHAR_HALFWIDTH_KATAKANA) - { - if ( (*c1 = get_byte_from_character_table (c, latin_jisx0201_to_ucs, - 94, 33)) ) - { - *charset = Vcharset_latin_jisx0201; + *charset = Vcharset_hebrew_iso8859_8; + *c1 = c - MIN_CHAR_HEBREW + 0x20; *c2 = 0; } - else if ( (*c1 = get_byte_from_character_table (c, - latin_viscii_lower_to_ucs, - 96, 32)) ) + else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI)) { - *charset = Vcharset_latin_viscii_lower; + *charset = Vcharset_thai_tis620; + *c1 = c - MIN_CHAR_THAI + 0x20; *c2 = 0; } - else if ( (*c1 = get_byte_from_character_table (c, - latin_viscii_upper_to_ucs, - 96, 32)) ) + else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c) + && (c <= MAX_CHAR_HALFWIDTH_KATAKANA)) { - *charset = Vcharset_latin_viscii_upper; + *charset = Vcharset_katakana_jisx0201; + *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20; *c2 = 0; } else @@ -614,12 +486,6 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) *c2 = c & 0xff; } } - else if (c <= MAX_CHAR_HALFWIDTH_KATAKANA) - { - *charset = Vcharset_katakana_jisx0201; - *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20; - *c2 = 0; - } else if (c <= MAX_CHAR_94) { *charset @@ -679,22 +545,13 @@ CHAR_CHARSET (Emchar ch) #define CHAR_LEADING_BYTE(c) (XCHARSET_LEADING_BYTE(CHAR_CHARSET(c))) - -#ifdef ENABLE_COMPOSITE_CHARS -/************************************************************************/ -/* Composite characters */ -/************************************************************************/ - -Emchar lookup_composite_char (Bufbyte *str, int len); -Lisp_Object composite_char_string (Emchar ch); -#endif /* ENABLE_COMPOSITE_CHARS */ +#define CHAR_COLUMNS(c) (CHARSET_COLUMNS(XCHARSET(CHAR_CHARSET(c)))) /************************************************************************/ /* Exported functions */ /************************************************************************/ -EXFUN (Ffind_charset, 1); EXFUN (Fget_charset, 1); extern Lisp_Object Vcharset_chinese_big5_1; diff --git a/src/depend b/src/depend index 96f333d..1121482 100644 --- a/src/depend +++ b/src/depend @@ -216,6 +216,7 @@ sysdll.o: config.h sysdll.h termcap.o: $(LISP_H) conslots.h console.h device.h terminfo.o: config.h tests.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h lstream.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h opaque.h +text-coding.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h elhash.h file-coding.h insdel.h lstream.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-ccl.h mule-charset.h multibyte.h opaque.h toolbar.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h conslots.h console.h device.h frame.h frameslots.h glyphs.h gui.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h redisplay.h scrollbar.h specifier.h toolbar.h window.h winslots.h tooltalk.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h elhash.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h process.h syssignal.h tooltalk.h tparam.o: config.h diff --git a/src/file-coding.c b/src/file-coding.c index 9774267..fdf4a31 100644 --- a/src/file-coding.c +++ b/src/file-coding.c @@ -892,10 +892,6 @@ if TYPE is 'ccl: CHECK_STRING (doc_string); CODING_SYSTEM_DOC_STRING (codesys) = doc_string; -#ifdef UTF2000 - if (ty == CODESYS_NO_CONVERSION) - codesys->fixed.size = 1; -#endif { EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) { @@ -5290,9 +5286,23 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, charset = new_charset; } -#ifndef UTF2000 +#ifdef UTF2000 + if (XCHARSET_DIMENSION (charset) == 1) + { + DECODE_OUTPUT_PARTIAL_CHAR (ch); + DECODE_ADD_UCS_CHAR + (MAKE_CHAR (charset, c & 0x7F, 0), dst); + } + else if (ch) + { + DECODE_ADD_UCS_CHAR + (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst); + ch = 0; + } + else + ch = c; +#else lb = XCHARSET_LEADING_BYTE (charset); -#endif switch (XCHARSET_REP_BYTES (charset)) { case 1: /* ASCII */ @@ -5302,44 +5312,25 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, case 2: /* one-byte official */ DECODE_OUTPUT_PARTIAL_CHAR (ch); -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst); -#else Dynarr_add (dst, lb); Dynarr_add (dst, c | 0x80); -#endif break; case 3: /* one-byte private or two-byte official */ -#ifdef UTF2000 - if (XCHARSET_DIMENSION (charset) == 1) -#else if (XCHARSET_PRIVATE_P (charset)) -#endif { DECODE_OUTPUT_PARTIAL_CHAR (ch); -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), - dst); -#else Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1); Dynarr_add (dst, lb); Dynarr_add (dst, c | 0x80); -#endif } else { if (ch) { -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, - ch & 0x7F, - c & 0x7F), dst); -#else Dynarr_add (dst, lb); Dynarr_add (dst, ch | 0x80); Dynarr_add (dst, c | 0x80); -#endif ch = 0; } else @@ -5350,21 +5341,16 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, default: /* two-byte private */ if (ch) { -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, - ch & 0x7F, - c & 0x7F), dst); -#else Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2); Dynarr_add (dst, lb); Dynarr_add (dst, ch | 0x80); Dynarr_add (dst, c | 0x80); -#endif ch = 0; } else ch = c; } +#endif } if (!ch) @@ -6088,31 +6074,7 @@ encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src, break; case 1: ch = ( ch << 6 ) | ( c & 0x3f ); - switch ( str->codesys->fixed.size ) - { - case 1: - Dynarr_add (dst, ch & 0xff); - break; - case 2: - Dynarr_add (dst, (ch >> 8) & 0xff); - Dynarr_add (dst, ch & 0xff); - break; - case 3: - Dynarr_add (dst, (ch >> 16) & 0xff); - Dynarr_add (dst, (ch >> 8) & 0xff); - Dynarr_add (dst, ch & 0xff); - break; - case 4: - Dynarr_add (dst, (ch >> 24) & 0xff); - Dynarr_add (dst, (ch >> 16) & 0xff); - Dynarr_add (dst, (ch >> 8) & 0xff); - Dynarr_add (dst, ch & 0xff); - break; - default: - fprintf(stderr, "It seems %d bytes stream.\n", - str->codesys->fixed.size); - abort (); - } + Dynarr_add (dst, ch & 0xff); char_boundary = 0; break; default: diff --git a/src/file-coding.h b/src/file-coding.h index 895093a..322dbec 100644 --- a/src/file-coding.h +++ b/src/file-coding.h @@ -132,10 +132,6 @@ struct Lisp_Coding_System } iso2022; struct { - unsigned char size; - } fixed; - struct - { /* For a CCL coding system, these specify the CCL programs used for decoding (input) and encoding (output). */ Lisp_Object decode; diff --git a/src/indent.c b/src/indent.c index 0ef743f..9ecf75b 100644 --- a/src/indent.c +++ b/src/indent.c @@ -169,7 +169,7 @@ column_at_point (struct buffer *buf, Bufpos init_pos, int cur_col) + displayed_glyphs->end_columns)); #else /* XEmacs */ #ifdef MULE - col += XCHARSET_COLUMNS (CHAR_CHARSET (c)); + col += CHAR_COLUMNS (c); #else col ++; #endif /* MULE */ @@ -226,7 +226,7 @@ string_column_at_point (Lisp_String* s, Bufpos init_pos, int tab_width) break; else #ifdef MULE - col += XCHARSET_COLUMNS (CHAR_CHARSET (c)); + col += CHAR_COLUMNS (c); #else col ++; #endif /* MULE */ @@ -456,7 +456,7 @@ Returns the actual column that it moved to. + displayed_glyphs->end_columns)); #else /* XEmacs */ #ifdef MULE - col += XCHARSET_COLUMNS (CHAR_CHARSET (c)); + col += CHAR_COLUMNS (c); #else col ++; #endif /* MULE */ diff --git a/src/insdel.c b/src/insdel.c index 2530222..046697c 100644 --- a/src/insdel.c +++ b/src/insdel.c @@ -3162,7 +3162,7 @@ bufbyte_string_displayed_columns (const Bufbyte *str, Bytecount len) { #ifdef MULE Emchar ch = charptr_emchar (str); - cols += XCHARSET_COLUMNS (CHAR_CHARSET (ch)); + cols += CHAR_COLUMNS (ch); #else cols++; #endif @@ -3180,7 +3180,7 @@ emchar_string_displayed_columns (const Emchar *str, Charcount len) int i; for (i = 0; i < len; i++) - cols += XCHARSET_COLUMNS (CHAR_CHARSET (str[i])); + cols += CHAR_COLUMNS (str[i]); return cols; #else /* not MULE */ diff --git a/src/mule-charset.c b/src/mule-charset.c index c0fe875..69278bf 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -135,18 +135,25 @@ const Bytecount rep_bytes_by_first_byte[0xA0] = #endif #ifdef UTF2000 -unsigned char -get_byte_from_character_table (Emchar ch, - Emchar* table, size_t size, unsigned char offset) +int +get_byte_from_character_table (Emchar ch, Lisp_Object ccs) { - size_t i; + Lisp_Charset* cs = XCHARSET(ccs); - for (i = 0; i < size; i++) + if (CHARSET_DIMENSION (cs) == 1) { - if (table[i] == ch) - return i + offset; + Emchar* table = CHARSET_DECODING_TABLE (cs); + size_t size = CHARSET_CHARS (cs); + unsigned char offset = CHARSET_CODE_OFFSET (cs); + size_t i; + + for (i = 0; i < size; i++) + { + if (table[i] == ch) + return i + offset; + } } - return 0; + return -1; } #define CHAR96(ft,b) (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32) @@ -249,7 +256,6 @@ Emchar latin_jisx0201_to_ucs[94] = 0x203E /* 0x7E OVERLINE */ }; - Emchar latin_iso8859_2_to_ucs[96] = { 0x00A0 /* 0xA0 NO-BREAK SPACE */, @@ -850,7 +856,6 @@ Emchar latin_viscii_upper_to_ucs[96] = CHAR96('2', 0x7f) }; - Emchar latin_tcvn5712_to_ucs[96] = { 0x00A0 /* 0xA0 NO-BREAK SPACE */, @@ -951,399 +956,6 @@ Emchar latin_tcvn5712_to_ucs[96] = 0x1ED0 /* 0xFF LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */ }; -Charset_ID latin_a_char_to_charset[128] = { - /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0114 */ LEADING_BYTE_UCS_BMP, - /* U+0115 */ LEADING_BYTE_UCS_BMP, - /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+012C */ LEADING_BYTE_UCS_BMP, - /* U+012D */ LEADING_BYTE_UCS_BMP, - /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+014E */ LEADING_BYTE_UCS_BMP, - /* U+014F */ LEADING_BYTE_UCS_BMP, - /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3, - /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4, - /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212, - /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2, - /* U+017F */ LEADING_BYTE_UCS_BMP -}; - -unsigned char latin_a_char_to_byte1[128] = { - /* U+0100 */ 0xC0 - 0x80, - /* U+0101 */ 0xE0 - 0x80, - /* U+0102 */ 0xC3 - 0x80, - /* U+0103 */ 0xE3 - 0x80, - /* U+0104 */ 0xA1 - 0x80, - /* U+0105 */ 0xB1 - 0x80, - /* U+0106 */ 0xC6 - 0x80, - /* U+0107 */ 0xE6 - 0x80, - /* U+0108 */ 0xC6 - 0x80, - /* U+0109 */ 0xE6 - 0x80, - /* U+010A */ 0xC5 - 0x80, - /* U+010B */ 0xE5 - 0x80, - /* U+010C */ 0xC8 - 0x80, - /* U+010D */ 0xE8 - 0x80, - /* U+010E */ 0xCF - 0x80, - /* U+010F */ 0xEF - 0x80, - /* U+0110 */ 0xD0 - 0x80, - /* U+0111 */ 0xF0 - 0x80, - /* U+0112 */ 0xAA - 0x80, - /* U+0113 */ 0xBA - 0x80, - /* U+0114 */ 0x01, - /* U+0115 */ 0x01, - /* U+0116 */ 0xCC - 0x80, - /* U+0117 */ 0xEC - 0x80, - /* U+0118 */ 0xCA - 0x80, - /* U+0119 */ 0xEA - 0x80, - /* U+011A */ 0xCC - 0x80, - /* U+011B */ 0xEC - 0x80, - /* U+011C */ 0xD8 - 0x80, - /* U+011D */ 0xF8 - 0x80, - /* U+011E */ 0xAB - 0x80, - /* U+011F */ 0xBB - 0x80, - /* U+0120 */ 0xD5 - 0x80, - /* U+0121 */ 0xF5 - 0x80, - /* U+0122 */ 0xAB - 0x80, - /* U+0123 */ 0xBB - 0x80, - /* U+0124 */ 0xA6 - 0x80, - /* U+0125 */ 0xB6 - 0x80, - /* U+0126 */ 0xA1 - 0x80, - /* U+0127 */ 0xB1 - 0x80, - /* U+0128 */ 0xA5 - 0x80, - /* U+0129 */ 0xB5 - 0x80, - /* U+012A */ 0xCF - 0x80, - /* U+012B */ 0xEF - 0x80, - /* U+012C */ 0x01, - /* U+012D */ 0x01, - /* U+012E */ 0xC7 - 0x80, - /* U+012F */ 0xE7 - 0x80, - /* U+0130 */ 0xA9 - 0x80, - /* U+0131 */ 0xB9 - 0x80, - /* U+0132 */ 0x29, - /* U+0133 */ 0x29, - /* U+0134 */ 0xAC - 0x80, - /* U+0135 */ 0xBC - 0x80, - /* U+0136 */ 0xD3 - 0x80, - /* U+0137 */ 0xF3 - 0x80, - /* U+0138 */ 0xA2 - 0x80, - /* U+0139 */ 0xC5 - 0x80, - /* U+013A */ 0xE5 - 0x80, - /* U+013B */ 0xA6 - 0x80, - /* U+013C */ 0xB6 - 0x80, - /* U+013D */ 0xA5 - 0x80, - /* U+013E */ 0xB5 - 0x80, - /* U+013F */ 0x29, - /* U+0140 */ 0x29, - /* U+0141 */ 0xA3 - 0x80, - /* U+0142 */ 0xB3 - 0x80, - /* U+0143 */ 0xD1 - 0x80, - /* U+0144 */ 0xF1 - 0x80, - /* U+0145 */ 0xD1 - 0x80, - /* U+0146 */ 0xF1 - 0x80, - /* U+0147 */ 0xD2 - 0x80, - /* U+0148 */ 0xF2 - 0x80, - /* U+0149 */ 0x29, - /* U+014A */ 0xBD - 0x80, - /* U+014B */ 0xBF - 0x80, - /* U+014C */ 0xD2 - 0x80, - /* U+014D */ 0xF2 - 0x80, - /* U+014E */ 0x01, - /* U+014F */ 0x01, - /* U+0150 */ 0xD5 - 0x80, - /* U+0151 */ 0xF5 - 0x80, - /* U+0152 */ 0x29, - /* U+0153 */ 0x29, - /* U+0154 */ 0xC0 - 0x80, - /* U+0155 */ 0xE0 - 0x80, - /* U+0156 */ 0xA3 - 0x80, - /* U+0157 */ 0xB3 - 0x80, - /* U+0158 */ 0xD8 - 0x80, - /* U+0159 */ 0xF8 - 0x80, - /* U+015A */ 0xA6 - 0x80, - /* U+015B */ 0xB6 - 0x80, - /* U+015C */ 0xDE - 0x80, - /* U+015D */ 0xFE - 0x80, - /* U+015E */ 0xAA - 0x80, - /* U+015F */ 0xBA - 0x80, - /* U+0160 */ 0xA9 - 0x80, - /* U+0161 */ 0xB9 - 0x80, - /* U+0162 */ 0xDE - 0x80, - /* U+0163 */ 0xFE - 0x80, - /* U+0164 */ 0xAB - 0x80, - /* U+0165 */ 0xBB - 0x80, - /* U+0166 */ 0xAC - 0x80, - /* U+0167 */ 0xBC - 0x80, - /* U+0168 */ 0xDD - 0x80, - /* U+0169 */ 0xFD - 0x80, - /* U+016A */ 0xDE - 0x80, - /* U+016B */ 0xFE - 0x80, - /* U+016C */ 0xDD - 0x80, - /* U+016D */ 0xFD - 0x80, - /* U+016E */ 0xD9 - 0x80, - /* U+016F */ 0xF9 - 0x80, - /* U+0170 */ 0xDB - 0x80, - /* U+0171 */ 0xFB - 0x80, - /* U+0172 */ 0xD9 - 0x80, - /* U+0173 */ 0xF9 - 0x80, - /* U+0174 */ 0x2A, - /* U+0175 */ 0x2B, - /* U+0176 */ 0x2A, - /* U+0177 */ 0x2B, - /* U+0178 */ 0x2A, - /* U+0179 */ 0xAC - 0x80, - /* U+017A */ 0xBC - 0x80, - /* U+017B */ 0xAF - 0x80, - /* U+017C */ 0xBF - 0x80, - /* U+017D */ 0xAE - 0x80, - /* U+017E */ 0xBE - 0x80, - /* U+017F */ 0x01 -}; - -unsigned char latin_a_char_to_byte2[128] = { - /* U+0100 */ 0x00, - /* U+0101 */ 0x00, - /* U+0102 */ 0x00, - /* U+0103 */ 0x00, - /* U+0104 */ 0x00, - /* U+0105 */ 0x00, - /* U+0106 */ 0x00, - /* U+0107 */ 0x00, - /* U+0108 */ 0x00, - /* U+0109 */ 0x00, - /* U+010A */ 0x00, - /* U+010B */ 0x00, - /* U+010C */ 0x00, - /* U+010D */ 0x00, - /* U+010E */ 0x00, - /* U+010F */ 0x00, - /* U+0110 */ 0x00, - /* U+0111 */ 0x00, - /* U+0112 */ 0x00, - /* U+0113 */ 0x00, - /* U+0114 */ 0x14, - /* U+0115 */ 0x15, - /* U+0116 */ 0x00, - /* U+0117 */ 0x00, - /* U+0118 */ 0x00, - /* U+0119 */ 0x00, - /* U+011A */ 0x00, - /* U+011B */ 0x00, - /* U+011C */ 0x00, - /* U+011D */ 0x00, - /* U+011E */ 0x00, - /* U+011F */ 0x00, - /* U+0120 */ 0x00, - /* U+0121 */ 0x00, - /* U+0122 */ 0x00, - /* U+0123 */ 0x00, - /* U+0124 */ 0x00, - /* U+0125 */ 0x00, - /* U+0126 */ 0x00, - /* U+0127 */ 0x00, - /* U+0128 */ 0x00, - /* U+0129 */ 0x00, - /* U+012A */ 0x00, - /* U+012B */ 0x00, - /* U+012C */ 0x2C, - /* U+012D */ 0x2D, - /* U+012E */ 0x00, - /* U+012F */ 0x00, - /* U+0130 */ 0x00, - /* U+0131 */ 0x00, - /* U+0132 */ 0x26, - /* U+0133 */ 0x46, - /* U+0134 */ 0x00, - /* U+0135 */ 0x00, - /* U+0136 */ 0x00, - /* U+0137 */ 0x00, - /* U+0138 */ 0x00, - /* U+0139 */ 0x00, - /* U+013A */ 0x00, - /* U+013B */ 0x00, - /* U+013C */ 0x00, - /* U+013D */ 0x00, - /* U+013E */ 0x00, - /* U+013F */ 0x29, - /* U+0140 */ 0x49, - /* U+0141 */ 0x00, - /* U+0142 */ 0x00, - /* U+0143 */ 0x00, - /* U+0144 */ 0x00, - /* U+0145 */ 0x00, - /* U+0146 */ 0x00, - /* U+0147 */ 0x00, - /* U+0148 */ 0x00, - /* U+0149 */ 0x4A, - /* U+014A */ 0x00, - /* U+014B */ 0x00, - /* U+014C */ 0x00, - /* U+014D */ 0x00, - /* U+014E */ 0x4E, - /* U+014F */ 0x4F, - /* U+0150 */ 0x00, - /* U+0151 */ 0x00, - /* U+0152 */ 0x2D, - /* U+0153 */ 0x4D, - /* U+0154 */ 0x00, - /* U+0155 */ 0x00, - /* U+0156 */ 0x00, - /* U+0157 */ 0x00, - /* U+0158 */ 0x00, - /* U+0159 */ 0x00, - /* U+015A */ 0x00, - /* U+015B */ 0x00, - /* U+015C */ 0x00, - /* U+015D */ 0x00, - /* U+015E */ 0x00, - /* U+015F */ 0x00, - /* U+0160 */ 0x00, - /* U+0161 */ 0x00, - /* U+0162 */ 0x00, - /* U+0163 */ 0x00, - /* U+0164 */ 0x00, - /* U+0165 */ 0x00, - /* U+0166 */ 0x00, - /* U+0167 */ 0x00, - /* U+0168 */ 0x00, - /* U+0169 */ 0x00, - /* U+016A */ 0x00, - /* U+016B */ 0x00, - /* U+016C */ 0x00, - /* U+016D */ 0x00, - /* U+016E */ 0x00, - /* U+016F */ 0x00, - /* U+0170 */ 0x00, - /* U+0171 */ 0x00, - /* U+0172 */ 0x00, - /* U+0173 */ 0x00, - /* U+0174 */ 0x71, - /* U+0175 */ 0x71, - /* U+0176 */ 0x74, - /* U+0177 */ 0x74, - /* U+0178 */ 0x73, - /* U+0179 */ 0x00, - /* U+017A */ 0x00, - /* U+017B */ 0x00, - /* U+017C */ 0x00, - /* U+017D */ 0x00, - /* U+017E */ 0x00, - /* U+017F */ 0x7F -}; - Lisp_Object Vutf_2000_version; #endif @@ -1781,17 +1393,20 @@ static const struct lrecord_description charset_description[] = { }; DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, - mark_charset, print_charset, 0, 0, 0, charset_description, + mark_charset, print_charset, 0, 0, 0, + charset_description, Lisp_Charset); /* Make a new charset. */ /* #### SJT Should generic properties be allowed? */ static Lisp_Object -make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes, +make_charset (Charset_ID id, Lisp_Object name, unsigned char type, unsigned char columns, unsigned char graphic, - Bufbyte final, unsigned char direction, Lisp_Object short_name, + Bufbyte final, unsigned char direction, Lisp_Object short_name, Lisp_Object long_name, Lisp_Object doc, - Lisp_Object reg) + Lisp_Object reg, + Emchar* decoding_table, + Emchar ucs_min, Emchar ucs_max, Emchar code_offset) { Lisp_Object obj; Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset); @@ -1804,7 +1419,6 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes, CHARSET_NAME (cs) = name; CHARSET_SHORT_NAME (cs) = short_name; CHARSET_LONG_NAME (cs) = long_name; - CHARSET_REP_BYTES (cs) = rep_bytes; CHARSET_DIRECTION (cs) = direction; CHARSET_TYPE (cs) = type; CHARSET_COLUMNS (cs) = columns; @@ -1814,7 +1428,13 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes, CHARSET_REGISTRY (cs) = reg; CHARSET_CCL_PROGRAM (cs) = Qnil; CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; - +#ifdef UTF2000 + CHARSET_DECODING_TABLE(cs) = decoding_table; + CHARSET_UCS_MIN(cs) = ucs_min; + CHARSET_UCS_MAX(cs) = ucs_max; + CHARSET_CODE_OFFSET(cs) = code_offset; +#endif + switch ( CHARSET_TYPE (cs) ) { case CHARSET_TYPE_94: @@ -1845,6 +1465,15 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes, #endif } +#ifndef UTF2000 + if (id == LEADING_BYTE_ASCII) + CHARSET_REP_BYTES (cs) = 1; + else if (id < 0xA0) + CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1; + else + CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2; +#endif + if (final) { /* some charsets do not have final characters. This includes @@ -1861,6 +1490,11 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes, assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE])); chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj; +#ifndef UTF2000 + if (id < 0xA0) + /* official leading byte */ + rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs); +#endif /* Some charsets are "faux" and don't have names or really exist at all except in the leading-byte table. */ @@ -1897,6 +1531,87 @@ get_unallocated_leading_byte (int dimension) return lb; } +#ifdef UTF2000 +unsigned char +charset_get_byte1 (Lisp_Object charset, Emchar ch) +{ + Emchar* table = XCHARSET_DECODING_TABLE (charset); + int d; + + if ( (table != NULL) && + (XCHARSET_DIMENSION (charset) == 1) && + ( (d = get_byte_from_character_table (ch, charset)) >= 0) ) + return d; + else if ((CHARSET_UCS_MIN (XCHARSET (charset)) <= ch) + && (ch <= CHARSET_UCS_MAX (XCHARSET (charset)))) + return ch - CHARSET_UCS_MIN (XCHARSET (charset)) + + CHARSET_CODE_OFFSET (XCHARSET (charset)); + else if (XCHARSET_DIMENSION (charset) == 1) + { + if (XCHARSET_CHARS (charset) == 94) + { + if (((d = ch - (MIN_CHAR_94 + + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0) + && (d < 94)) + return d + 32; + } + else if (XCHARSET_CHARS (charset) == 96) + { + if (((d = ch - (MIN_CHAR_96 + + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0) + && (d < 96)) + return d + 33; + } + else + return 0; + } + else if (XCHARSET_DIMENSION (charset) == 2) + { + if (XCHARSET_CHARS (charset) == 94) + { + if (((d = ch - (MIN_CHAR_94x94 + + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0) + && (d < 94 * 94)) + return (d / 94) + 33; + } + else if (XCHARSET_CHARS (charset) == 96) + { + if (((d = ch - (MIN_CHAR_96x96 + + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0) + && (d < 96 * 96)) + return (d / 96) + 32; + } + } + return 0; +} + +unsigned char +charset_get_byte2 (Lisp_Object charset, Emchar ch) +{ + if (XCHARSET_DIMENSION (charset) == 1) + return 0; + else + { + if (EQ (charset, Vcharset_ucs_bmp)) + return (ch >> 8) & 0xff; + else if (XCHARSET_CHARS (charset) == 94) + return (MIN_CHAR_94x94 + + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch) + && (ch < MIN_CHAR_94x94 + + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ? + ((ch - MIN_CHAR_94x94) % 94) + 33 : 0; + else /* if (XCHARSET_CHARS (charset) == 96) */ + return (MIN_CHAR_96x96 + + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch) + && (ch < MIN_CHAR_96x96 + + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ? + ((ch - MIN_CHAR_96x96) % 96) + 32 : 0; + } +} + +Lisp_Object Vdefault_coded_charset_priority_list; +#endif + /************************************************************************/ /* Basic charset Lisp functions */ @@ -2208,8 +1923,11 @@ character set. Recognized properties are: if (columns == -1) columns = dimension; - charset = make_charset (id, name, dimension + 2, type, columns, graphic, - final, direction, short_name, long_name, doc_string, registry); + charset = make_charset (id, name, type, columns, graphic, + final, direction, short_name, long_name, + doc_string, registry, + NULL, + 0, 0, 0); if (!NILP (ccl_program)) XCHARSET_CCL_PROGRAM (charset) = ccl_program; return charset; @@ -2254,9 +1972,11 @@ NEW-NAME is the name of the new charset. Return the new charset. long_name = CHARSET_LONG_NAME (cs); registry = CHARSET_REGISTRY (cs); - new_charset = make_charset (id, new_name, dimension + 2, type, columns, + new_charset = make_charset (id, new_name, type, columns, graphic, final, direction, short_name, long_name, - doc_string, registry); + doc_string, registry, + NULL, + 0, 0, 0); CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; @@ -2789,10 +2509,16 @@ Leading-code of private TYPE9N charset of column-width 1. #endif #ifdef UTF2000 - Vutf_2000_version = build_string("0.6 (Tōbushijō-mae)"); + Vutf_2000_version = build_string("0.7 (Hirano)"); DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /* Version number of UTF-2000. */ ); + + Vdefault_coded_charset_priority_list = Qnil; + DEFVAR_LISP ("default-coded-charset-priority-list", + &Vdefault_coded_charset_priority_list /* +Default order of preferred coded-character-set. +*/ ); #endif } @@ -2809,296 +2535,331 @@ complex_vars_of_mule_charset (void) #ifdef UTF2000 staticpro (&Vcharset_ucs_bmp); Vcharset_ucs_bmp = - make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1, + make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, CHARSET_TYPE_256X256, 1, 0, 0, CHARSET_LEFT_TO_RIGHT, build_string ("BMP"), build_string ("BMP"), build_string ("BMP"), - build_string ("")); + build_string (""), + NULL, 0, 0xFFFF, 0); #endif staticpro (&Vcharset_ascii); Vcharset_ascii = - make_charset (LEADING_BYTE_ASCII, Qascii, 1, + make_charset (LEADING_BYTE_ASCII, Qascii, CHARSET_TYPE_94, 1, 0, 'B', CHARSET_LEFT_TO_RIGHT, build_string ("ASCII"), build_string ("ASCII)"), build_string ("ASCII (ISO646 IRV)"), - build_string ("\\(iso8859-[0-9]*\\|-ascii\\)")); + build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), + NULL, 0, 0x7F, 0); staticpro (&Vcharset_control_1); Vcharset_control_1 = - make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, + make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, CHARSET_TYPE_94, 1, 1, 0, CHARSET_LEFT_TO_RIGHT, build_string ("C1"), build_string ("Control characters"), build_string ("Control characters 128-191"), - build_string ("")); + build_string (""), + NULL, 0x80, 0x9F, 0); staticpro (&Vcharset_latin_iso8859_1); Vcharset_latin_iso8859_1 = - make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, + make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, CHARSET_TYPE_96, 1, 1, 'A', CHARSET_LEFT_TO_RIGHT, build_string ("Latin-1"), build_string ("ISO8859-1 (Latin-1)"), build_string ("ISO8859-1 (Latin-1)"), - build_string ("iso8859-1")); + build_string ("iso8859-1"), + NULL, 0xA0, 0xFF, 32); staticpro (&Vcharset_latin_iso8859_2); Vcharset_latin_iso8859_2 = - make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, + make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, CHARSET_TYPE_96, 1, 1, 'B', CHARSET_LEFT_TO_RIGHT, build_string ("Latin-2"), build_string ("ISO8859-2 (Latin-2)"), build_string ("ISO8859-2 (Latin-2)"), - build_string ("iso8859-2")); + build_string ("iso8859-2"), + latin_iso8859_2_to_ucs, 0, 0, 32); staticpro (&Vcharset_latin_iso8859_3); Vcharset_latin_iso8859_3 = - make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, + make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, CHARSET_TYPE_96, 1, 1, 'C', CHARSET_LEFT_TO_RIGHT, build_string ("Latin-3"), build_string ("ISO8859-3 (Latin-3)"), build_string ("ISO8859-3 (Latin-3)"), - build_string ("iso8859-3")); + build_string ("iso8859-3"), + latin_iso8859_3_to_ucs, 0, 0, 32); staticpro (&Vcharset_latin_iso8859_4); Vcharset_latin_iso8859_4 = - make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, + make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, CHARSET_TYPE_96, 1, 1, 'D', CHARSET_LEFT_TO_RIGHT, build_string ("Latin-4"), build_string ("ISO8859-4 (Latin-4)"), build_string ("ISO8859-4 (Latin-4)"), - build_string ("iso8859-4")); + build_string ("iso8859-4"), + latin_iso8859_4_to_ucs, 0, 0, 32); staticpro (&Vcharset_thai_tis620); Vcharset_thai_tis620 = - make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, + make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, CHARSET_TYPE_96, 1, 1, 'T', CHARSET_LEFT_TO_RIGHT, build_string ("TIS620"), build_string ("TIS620 (Thai)"), build_string ("TIS620.2529 (Thai)"), - build_string ("tis620")); + build_string ("tis620"), + NULL, MIN_CHAR_THAI, MAX_CHAR_THAI, 32); staticpro (&Vcharset_greek_iso8859_7); Vcharset_greek_iso8859_7 = - make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, + make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, CHARSET_TYPE_96, 1, 1, 'F', CHARSET_LEFT_TO_RIGHT, build_string ("ISO8859-7"), build_string ("ISO8859-7 (Greek)"), build_string ("ISO8859-7 (Greek)"), - build_string ("iso8859-7")); + build_string ("iso8859-7"), + NULL, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 32); staticpro (&Vcharset_arabic_iso8859_6); Vcharset_arabic_iso8859_6 = - make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, + make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, CHARSET_TYPE_96, 1, 1, 'G', CHARSET_RIGHT_TO_LEFT, build_string ("ISO8859-6"), build_string ("ISO8859-6 (Arabic)"), build_string ("ISO8859-6 (Arabic)"), - build_string ("iso8859-6")); + build_string ("iso8859-6"), + NULL, 0, 0, 32); staticpro (&Vcharset_hebrew_iso8859_8); Vcharset_hebrew_iso8859_8 = - make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, + make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, CHARSET_TYPE_96, 1, 1, 'H', CHARSET_RIGHT_TO_LEFT, build_string ("ISO8859-8"), build_string ("ISO8859-8 (Hebrew)"), build_string ("ISO8859-8 (Hebrew)"), - build_string ("iso8859-8")); + build_string ("iso8859-8"), + NULL, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 32); staticpro (&Vcharset_katakana_jisx0201); Vcharset_katakana_jisx0201 = - make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, + make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, CHARSET_TYPE_94, 1, 1, 'I', CHARSET_LEFT_TO_RIGHT, build_string ("JISX0201 Kana"), build_string ("JISX0201.1976 (Japanese Kana)"), build_string ("JISX0201.1976 Japanese Kana"), - build_string ("jisx0201.1976")); + build_string ("jisx0201.1976"), + NULL, + MIN_CHAR_HALFWIDTH_KATAKANA, + MAX_CHAR_HALFWIDTH_KATAKANA, 33); staticpro (&Vcharset_latin_jisx0201); Vcharset_latin_jisx0201 = - make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, + make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, CHARSET_TYPE_94, 1, 0, 'J', CHARSET_LEFT_TO_RIGHT, build_string ("JISX0201 Roman"), build_string ("JISX0201.1976 (Japanese Roman)"), build_string ("JISX0201.1976 Japanese Roman"), - build_string ("jisx0201.1976")); + build_string ("jisx0201.1976"), + latin_jisx0201_to_ucs, 0, 0, 33); staticpro (&Vcharset_cyrillic_iso8859_5); Vcharset_cyrillic_iso8859_5 = - make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, + make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, CHARSET_TYPE_96, 1, 1, 'L', CHARSET_LEFT_TO_RIGHT, build_string ("ISO8859-5"), build_string ("ISO8859-5 (Cyrillic)"), build_string ("ISO8859-5 (Cyrillic)"), - build_string ("iso8859-5")); + build_string ("iso8859-5"), + NULL, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 32); staticpro (&Vcharset_latin_iso8859_9); Vcharset_latin_iso8859_9 = - make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, + make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, CHARSET_TYPE_96, 1, 1, 'M', CHARSET_LEFT_TO_RIGHT, build_string ("Latin-5"), build_string ("ISO8859-9 (Latin-5)"), build_string ("ISO8859-9 (Latin-5)"), - build_string ("iso8859-9")); + build_string ("iso8859-9"), + latin_iso8859_9_to_ucs, 0, 0, 32); staticpro (&Vcharset_japanese_jisx0208_1978); Vcharset_japanese_jisx0208_1978 = - make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, + make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, CHARSET_TYPE_94X94, 2, 0, '@', CHARSET_LEFT_TO_RIGHT, build_string ("JISX0208.1978"), build_string ("JISX0208.1978 (Japanese)"), build_string ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), - build_string ("\\(jisx0208\\|jisc6226\\)\\.1978")); + build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), + NULL, 0, 0, 33); staticpro (&Vcharset_chinese_gb2312); Vcharset_chinese_gb2312 = - make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, + make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, CHARSET_TYPE_94X94, 2, 0, 'A', CHARSET_LEFT_TO_RIGHT, build_string ("GB2312"), build_string ("GB2312)"), build_string ("GB2312 Chinese simplified"), - build_string ("gb2312")); + build_string ("gb2312"), + NULL, 0, 0, 33); staticpro (&Vcharset_japanese_jisx0208); Vcharset_japanese_jisx0208 = - make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, + make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, CHARSET_TYPE_94X94, 2, 0, 'B', CHARSET_LEFT_TO_RIGHT, build_string ("JISX0208"), build_string ("JISX0208.1983/1990 (Japanese)"), build_string ("JISX0208.1983/1990 Japanese Kanji"), - build_string ("jisx0208.19\\(83\\|90\\)")); + build_string ("jisx0208.19\\(83\\|90\\)"), + NULL, 0, 0, 33); staticpro (&Vcharset_korean_ksc5601); Vcharset_korean_ksc5601 = - make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, + make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, CHARSET_TYPE_94X94, 2, 0, 'C', CHARSET_LEFT_TO_RIGHT, build_string ("KSC5601"), build_string ("KSC5601 (Korean"), build_string ("KSC5601 Korean Hangul and Hanja"), - build_string ("ksc5601")); + build_string ("ksc5601"), + NULL, 0, 0, 33); staticpro (&Vcharset_japanese_jisx0212); Vcharset_japanese_jisx0212 = - make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, + make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, CHARSET_TYPE_94X94, 2, 0, 'D', CHARSET_LEFT_TO_RIGHT, build_string ("JISX0212"), build_string ("JISX0212 (Japanese)"), build_string ("JISX0212 Japanese Supplement"), - build_string ("jisx0212")); + build_string ("jisx0212"), + NULL, 0, 0, 33); #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" staticpro (&Vcharset_chinese_cns11643_1); Vcharset_chinese_cns11643_1 = - make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, + make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, CHARSET_TYPE_94X94, 2, 0, 'G', CHARSET_LEFT_TO_RIGHT, build_string ("CNS11643-1"), build_string ("CNS11643-1 (Chinese traditional)"), build_string ("CNS 11643 Plane 1 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("1"))); + build_string (CHINESE_CNS_PLANE_RE("1")), + NULL, 0, 0, 33); staticpro (&Vcharset_chinese_cns11643_2); Vcharset_chinese_cns11643_2 = - make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, + make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, CHARSET_TYPE_94X94, 2, 0, 'H', CHARSET_LEFT_TO_RIGHT, build_string ("CNS11643-2"), build_string ("CNS11643-2 (Chinese traditional)"), build_string ("CNS 11643 Plane 2 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("2"))); + build_string (CHINESE_CNS_PLANE_RE("2")), + NULL, 0, 0, 33); #ifdef UTF2000 staticpro (&Vcharset_chinese_cns11643_3); Vcharset_chinese_cns11643_3 = - make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3, + make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, CHARSET_TYPE_94X94, 2, 0, 'I', CHARSET_LEFT_TO_RIGHT, build_string ("CNS11643-3"), build_string ("CNS11643-3 (Chinese traditional)"), build_string ("CNS 11643 Plane 3 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("3"))); + build_string (CHINESE_CNS_PLANE_RE("3")), + NULL, 0, 0, 33); staticpro (&Vcharset_chinese_cns11643_4); Vcharset_chinese_cns11643_4 = - make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3, + make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, CHARSET_TYPE_94X94, 2, 0, 'J', CHARSET_LEFT_TO_RIGHT, build_string ("CNS11643-4"), build_string ("CNS11643-4 (Chinese traditional)"), build_string ("CNS 11643 Plane 4 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("4"))); + build_string (CHINESE_CNS_PLANE_RE("4")), + NULL, 0, 0, 33); staticpro (&Vcharset_chinese_cns11643_5); Vcharset_chinese_cns11643_5 = - make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3, + make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, CHARSET_TYPE_94X94, 2, 0, 'K', CHARSET_LEFT_TO_RIGHT, build_string ("CNS11643-5"), build_string ("CNS11643-5 (Chinese traditional)"), build_string ("CNS 11643 Plane 5 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("5"))); + build_string (CHINESE_CNS_PLANE_RE("5")), + NULL, 0, 0, 33); staticpro (&Vcharset_chinese_cns11643_6); Vcharset_chinese_cns11643_6 = - make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3, + make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, CHARSET_TYPE_94X94, 2, 0, 'L', CHARSET_LEFT_TO_RIGHT, build_string ("CNS11643-6"), build_string ("CNS11643-6 (Chinese traditional)"), build_string ("CNS 11643 Plane 6 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("6"))); + build_string (CHINESE_CNS_PLANE_RE("6")), + NULL, 0, 0, 33); staticpro (&Vcharset_chinese_cns11643_7); Vcharset_chinese_cns11643_7 = - make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3, + make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, CHARSET_TYPE_94X94, 2, 0, 'M', CHARSET_LEFT_TO_RIGHT, build_string ("CNS11643-7"), build_string ("CNS11643-7 (Chinese traditional)"), build_string ("CNS 11643 Plane 7 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("7"))); + build_string (CHINESE_CNS_PLANE_RE("7")), + NULL, 0, 0, 33); + staticpro (&Vcharset_latin_viscii_lower); Vcharset_latin_viscii_lower = - make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 2, + make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, CHARSET_TYPE_96, 1, 1, '1', CHARSET_LEFT_TO_RIGHT, build_string ("VISCII lower"), build_string ("VISCII lower (Vietnamese)"), build_string ("VISCII lower (Vietnamese)"), - build_string ("VISCII1.1")); + build_string ("VISCII1.1"), + latin_viscii_lower_to_ucs, 0, 0, 32); + staticpro (&Vcharset_latin_viscii_upper); Vcharset_latin_viscii_upper = - make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 2, + make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, CHARSET_TYPE_96, 1, 1, '2', CHARSET_LEFT_TO_RIGHT, build_string ("VISCII upper"), build_string ("VISCII upper (Vietnamese)"), build_string ("VISCII upper (Vietnamese)"), - build_string ("VISCII1.1")); + build_string ("VISCII1.1"), + latin_viscii_upper_to_ucs, 0, 0, 32); #endif staticpro (&Vcharset_chinese_big5_1); Vcharset_chinese_big5_1 = - make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, + make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, CHARSET_TYPE_94X94, 2, 0, '0', CHARSET_LEFT_TO_RIGHT, build_string ("Big5"), build_string ("Big5 (Level-1)"), build_string ("Big5 Level-1 Chinese traditional"), - build_string ("big5")); + build_string ("big5"), + NULL, 0, 0, 33); staticpro (&Vcharset_chinese_big5_2); Vcharset_chinese_big5_2 = - make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, + make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, CHARSET_TYPE_94X94, 2, 0, '1', CHARSET_LEFT_TO_RIGHT, build_string ("Big5"), build_string ("Big5 (Level-2)"), build_string ("Big5 Level-2 Chinese traditional"), - build_string ("big5")); + build_string ("big5"), + NULL, 0, 0, 33); #ifdef ENABLE_COMPOSITE_CHARS /* #### For simplicity, we put composite chars into a 96x96 charset. @@ -3106,7 +2867,7 @@ complex_vars_of_mule_charset (void) room, esp. as we don't yet recycle numbers. */ staticpro (&Vcharset_composite); Vcharset_composite = - make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3, + make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, CHARSET_TYPE_96X96, 2, 0, 0, CHARSET_LEFT_TO_RIGHT, build_string ("Composite"), diff --git a/src/objects-tty.c b/src/objects-tty.c index d8f1a34..ea76e6f 100644 --- a/src/objects-tty.c +++ b/src/objects-tty.c @@ -242,7 +242,7 @@ tty_initialize_font_instance (Lisp_Font_Instance *f, Lisp_Object name, FONT_INSTANCE_TTY_CHARSET (f) = charset; #ifdef MULE if (CHARSETP (charset)) - f->width = XCHARSET_COLUMNS (charset); + f->width = CHARSET_COLUMNS (XCHARSET (charset)); else #endif f->width = 1; diff --git a/src/text-coding.c b/src/text-coding.c index 8b20b52..3a8c3fe 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -186,8 +186,11 @@ static int detect_coding_sjis (struct detection_state *st, const Extbyte *src, Lstream_data_count n); static void decode_coding_sjis (Lstream *decoding, const Extbyte *src, unsigned_char_dynarr *dst, Lstream_data_count n); -static void encode_coding_sjis (Lstream *encoding, const Bufbyte *src, - unsigned_char_dynarr *dst, Lstream_data_count n); +void char_encode_shift_jis (struct encoding_stream *str, Emchar c, + unsigned_char_dynarr *dst, unsigned int *flags); +void char_finish_shift_jis (struct encoding_stream *str, + unsigned_char_dynarr *dst, unsigned int *flags); + static int detect_coding_big5 (struct detection_state *st, const Extbyte *src, Lstream_data_count n); static void decode_coding_big5 (Lstream *decoding, const Extbyte *src, @@ -198,8 +201,11 @@ static int detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, Lstream_data_count n); static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src, unsigned_char_dynarr *dst, Lstream_data_count n); -static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, - unsigned_char_dynarr *dst, Lstream_data_count n); +void char_encode_ucs4 (struct encoding_stream *str, Emchar c, + unsigned_char_dynarr *dst, unsigned int *flags); +void char_finish_ucs4 (struct encoding_stream *str, + unsigned_char_dynarr *dst, unsigned int *flags); + static int detect_coding_utf8 (struct detection_state *st, const Extbyte *src, Lstream_data_count n); static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src, @@ -904,10 +910,6 @@ if TYPE is 'ccl: CHECK_STRING (doc_string); CODING_SYSTEM_DOC_STRING (codesys) = doc_string; -#ifdef UTF2000 - if (ty == CODESYS_NO_CONVERSION) - codesys->fixed.size = 1; -#endif { EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) { @@ -2817,6 +2819,15 @@ reset_encoding_stream (struct encoding_stream *str) case CODESYS_UTF8: str->encode_char = &char_encode_utf8; str->finish = &char_finish_utf8; + break; + case CODESYS_UCS4: + str->encode_char = &char_encode_ucs4; + str->finish = &char_finish_ucs4; + break; + case CODESYS_SHIFT_JIS: + str->encode_char = &char_encode_shift_jis; + str->finish = &char_finish_shift_jis; + break; default: break; } @@ -2932,15 +2943,9 @@ mule_encode (Lstream *encoding, const Bufbyte *src, encode_coding_no_conversion (encoding, src, dst, n); break; #ifdef MULE - case CODESYS_SHIFT_JIS: - encode_coding_sjis (encoding, src, dst, n); - break; case CODESYS_BIG5: encode_coding_big5 (encoding, src, dst, n); break; - case CODESYS_UCS4: - encode_coding_ucs4 (encoding, src, dst, n); - break; case CODESYS_CCL: str->ccl.last_block = str->flags & CODING_STATE_END; /* When applying ccl program to stream, MUST NOT set NULL @@ -3030,64 +3035,57 @@ text_encode_generic (Lstream *encoding, const Bufbyte *src, struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); unsigned int flags = str->flags; Emchar ch = str->ch; - Lisp_Object charset; - int half; char_boundary = str->iso2022.current_char_boundary; - charset = str->iso2022.current_charset; - half = str->iso2022.current_half; while (n--) { c = *src++; - switch (char_boundary) + if (char_boundary == 0) { - case 0: - if ( c >= 0xfc ) + if (c >= 0xfc) { ch = c & 0x01; char_boundary = 5; } - else if ( c >= 0xf8 ) + else if (c >= 0xf8) { ch = c & 0x03; char_boundary = 4; } - else if ( c >= 0xf0 ) + else if (c >= 0xf0) { ch = c & 0x07; char_boundary = 3; } - else if ( c >= 0xe0 ) + else if (c >= 0xe0) { ch = c & 0x0f; char_boundary = 2; } - else if ( c >= 0xc0 ) + else if (c >= 0xc0) { ch = c & 0x1f; char_boundary = 1; } else - { - (*str->encode_char) (str, c, dst, &flags); - ch = 0; - char_boundary = 0; - } - break; - case 1: + (*str->encode_char) (str, c, dst, &flags); + } + else if (char_boundary == 1) + { (*str->encode_char) (str, (ch << 6) | (c & 0x3f), dst, &flags); ch =0; char_boundary = 0; - break; - default: - ch = ( ch << 6 ) | ( c & 0x3f ); + } + else + { + ch = (ch << 6) | (c & 0x3f); char_boundary--; } } - if ( (char_boundary == 0) && flags & CODING_STATE_END) + if ((char_boundary == 0) && (flags & CODING_STATE_END)) { (*str->finish) (str, dst, &flags); } @@ -3095,10 +3093,6 @@ text_encode_generic (Lstream *encoding, const Bufbyte *src, str->flags = flags; str->ch = ch; str->iso2022.current_char_boundary = char_boundary; - str->iso2022.current_charset = charset; - str->iso2022.current_half = half; - - /* Verbum caro factum est! */ } @@ -3212,6 +3206,11 @@ decode_coding_sjis (Lstream *decoding, const Extbyte *src, Dynarr_add (dst, c); #endif } +#ifdef UTF2000 + else if (c > 32) + DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_latin_jisx0201, + c, 0), dst); +#endif else DECODE_ADD_BINARY_CHAR (c, dst); } @@ -3224,136 +3223,61 @@ decode_coding_sjis (Lstream *decoding, const Extbyte *src, str->ch = ch; } -/* Convert internally-formatted data to Shift-JIS. */ +/* Convert internal character representation to Shift_JIS. */ -static void -encode_coding_sjis (Lstream *encoding, const Bufbyte *src, - unsigned_char_dynarr *dst, Lstream_data_count n) +void +char_encode_shift_jis (struct encoding_stream *str, Emchar ch, + unsigned_char_dynarr *dst, unsigned int *flags) { - struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); - unsigned int flags = str->flags; - unsigned int ch = str->ch; eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); -#ifdef UTF2000 - unsigned char char_boundary = str->iso2022.current_char_boundary; -#endif - while (n--) + if (ch == '\n') { - Bufbyte c = *src++; + if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) + Dynarr_add (dst, '\r'); + if (eol_type != EOL_CR) + Dynarr_add (dst, ch); + } + else + { + Lisp_Object charset; + unsigned int c1, c2, s1, s2; + #ifdef UTF2000 - switch (char_boundary) + if ( (c1 = + get_byte_from_character_table (ch, Vcharset_latin_jisx0201)) + >= 0 ) { - case 0: - if ( c >= 0xfc ) - { - ch = c & 0x01; - char_boundary = 5; - } - else if ( c >= 0xf8 ) - { - ch = c & 0x03; - char_boundary = 4; - } - else if ( c >= 0xf0 ) - { - ch = c & 0x07; - char_boundary = 3; - } - else if ( c >= 0xe0 ) - { - ch = c & 0x0f; - char_boundary = 2; - } - else if ( c >= 0xc0 ) - { - ch = c & 0x1f; - char_boundary = 1; - } - else - { - ch = 0; - if (c == '\n') - { - if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) - Dynarr_add (dst, '\r'); - if (eol_type != EOL_CR) - Dynarr_add (dst, c); - } - else - Dynarr_add (dst, c); - char_boundary = 0; - } - break; - case 1: - ch = ( ch << 6 ) | ( c & 0x3f ); - { - Lisp_Object charset; - unsigned int c1, c2, s1, s2; - - BREAKUP_CHAR (ch, charset, c1, c2); - if (EQ(charset, Vcharset_katakana_jisx0201)) - { - Dynarr_add (dst, c1 | 0x80); - } - else if (EQ(charset, Vcharset_japanese_jisx0208)) - { - ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); - Dynarr_add (dst, s1); - Dynarr_add (dst, s2); - } - } - char_boundary = 0; - break; - default: - ch = ( ch << 6 ) | ( c & 0x3f ); - char_boundary--; + charset = Vcharset_latin_jisx0201; + c2 = 0; } -#else - if (c == '\n') + else +#endif + BREAKUP_CHAR (ch, charset, c1, c2); + + if (EQ(charset, Vcharset_katakana_jisx0201)) { - if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) - Dynarr_add (dst, '\r'); - if (eol_type != EOL_CR) - Dynarr_add (dst, '\n'); - ch = 0; + Dynarr_add (dst, c1 | 0x80); } - else if (BYTE_ASCII_P (c)) + else if (c2 == 0) { - Dynarr_add (dst, c); - ch = 0; + Dynarr_add (dst, c1); } - else if (BUFBYTE_LEADING_BYTE_P (c)) - ch = (c == LEADING_BYTE_KATAKANA_JISX0201 || - c == LEADING_BYTE_JAPANESE_JISX0208_1978 || - c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; - else if (ch) + else if (EQ(charset, Vcharset_japanese_jisx0208)) { - if (ch == LEADING_BYTE_KATAKANA_JISX0201) - { - Dynarr_add (dst, c); - ch = 0; - } - else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 || - ch == LEADING_BYTE_JAPANESE_JISX0208) - ch = c; - else - { - unsigned char j1, j2; - ENCODE_SJIS (ch, c, j1, j2); - Dynarr_add (dst, j1); - Dynarr_add (dst, j2); - ch = 0; - } + ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); + Dynarr_add (dst, s1); + Dynarr_add (dst, s2); } -#endif + else + Dynarr_add (dst, '?'); } +} - str->flags = flags; - str->ch = ch; -#ifdef UTF2000 - str->iso2022.current_char_boundary = char_boundary; -#endif +void +char_finish_shift_jis (struct encoding_stream *str, unsigned_char_dynarr *dst, + unsigned int *flags) +{ } DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /* @@ -3692,164 +3616,8 @@ Return the corresponding character code in Big5. /************************************************************************/ /* UCS-4 methods */ -/* */ -/* UCS-4 character codes are implemented as nonnegative integers. */ -/* */ /************************************************************************/ - -DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /* -Map UCS-4 code CODE to Mule character CHARACTER. - -Return T on success, NIL on failure. -*/ - (code, character)) -{ - size_t c; - - CHECK_CHAR (character); - CHECK_NATNUM (code); - c = XINT (code); - - if (c < countof (fcd->ucs_to_mule_table)) - { - fcd->ucs_to_mule_table[c] = character; - return Qt; - } - else - return Qnil; -} - -static Lisp_Object -ucs_to_char (unsigned long code) -{ - if (code < countof (fcd->ucs_to_mule_table)) - { - return fcd->ucs_to_mule_table[code]; - } - else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14)) - { - unsigned int c; - - code -= 0xe00000; - c = code % (94 * 94); - return make_char - (MAKE_CHAR (CHARSET_BY_ATTRIBUTES - (CHARSET_TYPE_94X94, code / (94 * 94) + '@', - CHARSET_LEFT_TO_RIGHT), - c / 94 + 33, c % 94 + 33)); - } - else - return Qnil; -} - -DEFUN ("ucs-char", Fucs_char, 1, 1, 0, /* -Return Mule character corresponding to UCS code CODE (a positive integer). -*/ - (code)) -{ - CHECK_NATNUM (code); - return ucs_to_char (XINT (code)); -} - -DEFUN ("set-char-ucs", Fset_char_ucs, 2, 2, 0, /* -Map Mule character CHARACTER to UCS code CODE (a positive integer). -*/ - (character, code)) -{ - /* #### Isn't this gilding the lily? Fput_char_table checks its args. - Fset_char_ucs is more restrictive on index arg, but should - check code arg in a char_table method. */ - CHECK_CHAR (character); - CHECK_NATNUM (code); - return Fput_char_table (character, code, mule_to_ucs_table); -} - -DEFUN ("char-ucs", Fchar_ucs, 1, 1, 0, /* -Return the UCS code (a positive integer) corresponding to CHARACTER. -*/ - (character)) -{ - return Fget_char_table (character, mule_to_ucs_table); -} - -#ifdef UTF2000 -#define decode_ucs4 DECODE_ADD_UCS_CHAR -#else -/* Decode a UCS-4 character into a buffer. If the lookup fails, use - (U+3013) of JIS X 0208, which means correct character - is not found, instead. - #### do something more appropriate (use blob?) - Danger, Will Robinson! Data loss. Should we signal user? */ -static void -decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst) -{ - Lisp_Object chr = ucs_to_char (ch); - - if (! NILP (chr)) - { - Bufbyte work[MAX_EMCHAR_LEN]; - int len; - - ch = XCHAR (chr); - len = (ch < 128) ? - simple_set_charptr_emchar (work, ch) : - non_ascii_set_charptr_emchar (work, ch); - Dynarr_add_many (dst, work, len); - } - else - { - Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); - Dynarr_add (dst, 34 + 128); - Dynarr_add (dst, 46 + 128); - } -} -#endif - -static unsigned long -mule_char_to_ucs4 (Lisp_Object charset, - unsigned char h, unsigned char l) -{ - Lisp_Object code - = Fget_char_table (make_char (MAKE_CHAR (charset, h & 127, l & 127)), - mule_to_ucs_table); - - if (INTP (code)) - { - return XINT (code); - } - else if ( (XCHARSET_DIMENSION (charset) == 2) && - (XCHARSET_CHARS (charset) == 94) ) - { - unsigned char final = XCHARSET_FINAL (charset); - - if ( ('@' <= final) && (final < 0x7f) ) - { - return 0xe00000 + (final - '@') * 94 * 94 - + ((h & 127) - 33) * 94 + (l & 127) - 33; - } - else - { - return '?'; - } - } - else - { - return '?'; - } -} - -static void -encode_ucs4 (Lisp_Object charset, - unsigned char h, unsigned char l, unsigned_char_dynarr *dst) -{ - unsigned long code = mule_char_to_ucs4 (charset, h, l); - Dynarr_add (dst, code >> 24); - Dynarr_add (dst, (code >> 16) & 255); - Dynarr_add (dst, (code >> 8) & 255); - Dynarr_add (dst, code & 255); -} - static int detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, Lstream_data_count n) { @@ -3893,7 +3661,7 @@ decode_coding_ucs4 (Lstream *decoding, const Extbyte *src, counter = 3; break; case 1: - decode_ucs4 ( ( ch << 8 ) | c, dst); + DECODE_ADD_UCS_CHAR ((ch << 8) | c, dst); ch = 0; counter = 0; break; @@ -3910,140 +3678,20 @@ decode_coding_ucs4 (Lstream *decoding, const Extbyte *src, str->counter = counter; } -static void -encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, - unsigned_char_dynarr *dst, Lstream_data_count n) +void +char_encode_ucs4 (struct encoding_stream *str, Emchar ch, + unsigned_char_dynarr *dst, unsigned int *flags) { -#ifndef UTF2000 - struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); - unsigned int flags = str->flags; - unsigned int ch = str->ch; - unsigned char char_boundary = str->iso2022.current_char_boundary; - Lisp_Object charset = str->iso2022.current_charset; - -#ifdef ENABLE_COMPOSITE_CHARS - /* flags for handling composite chars. We do a little switcharoo - on the source while we're outputting the composite char. */ - unsigned int saved_n = 0; - const unsigned char *saved_src = NULL; - int in_composite = 0; - - back_to_square_n: -#endif - - while (n--) - { - unsigned char c = *src++; - - if (BYTE_ASCII_P (c)) - { /* Processing ASCII character */ - ch = 0; - encode_ucs4 (Vcharset_ascii, c, 0, dst); - char_boundary = 1; - } - else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch)) - { /* Processing Leading Byte */ - ch = 0; - charset = CHARSET_BY_LEADING_BYTE (c); - if (LEADING_BYTE_PREFIX_P(c)) - ch = c; - char_boundary = 0; - } - else - { /* Processing Non-ASCII character */ - char_boundary = 1; - if (EQ (charset, Vcharset_control_1)) - { - encode_ucs4 (Vcharset_control_1, c, 0, dst); - } - else - { - switch (XCHARSET_REP_BYTES (charset)) - { - case 2: - encode_ucs4 (charset, c, 0, dst); - break; - case 3: - if (XCHARSET_PRIVATE_P (charset)) - { - encode_ucs4 (charset, c, 0, dst); - ch = 0; - } - else if (ch) - { -#ifdef ENABLE_COMPOSITE_CHARS - if (EQ (charset, Vcharset_composite)) - { - if (in_composite) - { - /* #### Bother! We don't know how to - handle this yet. */ - Dynarr_add (dst, '\0'); - Dynarr_add (dst, '\0'); - Dynarr_add (dst, '\0'); - Dynarr_add (dst, '~'); - } - else - { - Emchar emch = MAKE_CHAR (Vcharset_composite, - ch & 0x7F, c & 0x7F); - Lisp_Object lstr = composite_char_string (emch); - saved_n = n; - saved_src = src; - in_composite = 1; - src = XSTRING_DATA (lstr); - n = XSTRING_LENGTH (lstr); - } - } - else -#endif /* ENABLE_COMPOSITE_CHARS */ - { - encode_ucs4(charset, ch, c, dst); - } - ch = 0; - } - else - { - ch = c; - char_boundary = 0; - } - break; - case 4: - if (ch) - { - encode_ucs4 (charset, ch, c, dst); - ch = 0; - } - else - { - ch = c; - char_boundary = 0; - } - break; - default: - abort (); - } - } - } - } - -#ifdef ENABLE_COMPOSITE_CHARS - if (in_composite) - { - n = saved_n; - src = saved_src; - in_composite = 0; - goto back_to_square_n; /* Wheeeeeeeee ..... */ - } -#endif /* ENABLE_COMPOSITE_CHARS */ - - str->flags = flags; - str->ch = ch; - str->iso2022.current_char_boundary = char_boundary; - str->iso2022.current_charset = charset; + Dynarr_add (dst, ch >> 24); + Dynarr_add (dst, ch >> 16); + Dynarr_add (dst, ch >> 8); + Dynarr_add (dst, ch ); +} - /* Verbum caro factum est! */ -#endif +void +char_finish_ucs4 (struct encoding_stream *str, unsigned_char_dynarr *dst, + unsigned int *flags) +{ } @@ -4129,12 +3777,12 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, else { DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); - decode_ucs4 (c, dst); + DECODE_ADD_UCS_CHAR (c, dst); } break; case 1: ch = ( ch << 6 ) | ( c & 0x3f ); - decode_ucs4 (ch, dst); + DECODE_ADD_UCS_CHAR (ch, dst); ch = 0; counter = 0; break; @@ -4154,47 +3802,56 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, } void -char_encode_utf8 (struct encoding_stream *str, Emchar code, +char_encode_utf8 (struct encoding_stream *str, Emchar ch, unsigned_char_dynarr *dst, unsigned int *flags) { - if ( code <= 0x7f ) + eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); + + if (ch == '\n') + { + if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) + Dynarr_add (dst, '\r'); + if (eol_type != EOL_CR) + Dynarr_add (dst, ch); + } + else if (ch <= 0x7f) { - Dynarr_add (dst, code); + Dynarr_add (dst, ch); } - else if ( code <= 0x7ff ) + else if (ch <= 0x7ff) { - Dynarr_add (dst, (code >> 6) | 0xc0); - Dynarr_add (dst, (code & 0x3f) | 0x80); + Dynarr_add (dst, (ch >> 6) | 0xc0); + Dynarr_add (dst, (ch & 0x3f) | 0x80); } - else if ( code <= 0xffff ) + else if (ch <= 0xffff) { - Dynarr_add (dst, (code >> 12) | 0xe0); - Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80); - Dynarr_add (dst, (code & 0x3f) | 0x80); + Dynarr_add (dst, (ch >> 12) | 0xe0); + Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80); + Dynarr_add (dst, (ch & 0x3f) | 0x80); } - else if ( code <= 0x1fffff ) + else if (ch <= 0x1fffff) { - Dynarr_add (dst, (code >> 18) | 0xf0); - Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80); - Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80); - Dynarr_add (dst, (code & 0x3f) | 0x80); + Dynarr_add (dst, (ch >> 18) | 0xf0); + Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80); + Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80); + Dynarr_add (dst, (ch & 0x3f) | 0x80); } - else if ( code <= 0x3ffffff ) + else if (ch <= 0x3ffffff) { - Dynarr_add (dst, (code >> 24) | 0xf8); - Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80); - Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80); - Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80); - Dynarr_add (dst, (code & 0x3f) | 0x80); + Dynarr_add (dst, (ch >> 24) | 0xf8); + Dynarr_add (dst, ((ch >> 18) & 0x3f) | 0x80); + Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80); + Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80); + Dynarr_add (dst, (ch & 0x3f) | 0x80); } else { - Dynarr_add (dst, (code >> 30) | 0xfc); - Dynarr_add (dst, ((code >> 24) & 0x3f) | 0x80); - Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80); - Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80); - Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80); - Dynarr_add (dst, (code & 0x3f) | 0x80); + Dynarr_add (dst, (ch >> 30) | 0xfc); + Dynarr_add (dst, ((ch >> 24) & 0x3f) | 0x80); + Dynarr_add (dst, ((ch >> 18) & 0x3f) | 0x80); + Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80); + Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80); + Dynarr_add (dst, (ch & 0x3f) | 0x80); } } @@ -5144,9 +4801,23 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, charset = new_charset; } -#ifndef UTF2000 +#ifdef UTF2000 + if (XCHARSET_DIMENSION (charset) == 1) + { + DECODE_OUTPUT_PARTIAL_CHAR (ch); + DECODE_ADD_UCS_CHAR + (MAKE_CHAR (charset, c & 0x7F, 0), dst); + } + else if (ch) + { + DECODE_ADD_UCS_CHAR + (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst); + ch = 0; + } + else + ch = c; +#else lb = XCHARSET_LEADING_BYTE (charset); -#endif switch (XCHARSET_REP_BYTES (charset)) { case 1: /* ASCII */ @@ -5156,44 +4827,25 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, case 2: /* one-byte official */ DECODE_OUTPUT_PARTIAL_CHAR (ch); -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst); -#else Dynarr_add (dst, lb); Dynarr_add (dst, c | 0x80); -#endif break; case 3: /* one-byte private or two-byte official */ -#ifdef UTF2000 - if (XCHARSET_DIMENSION (charset) == 1) -#else if (XCHARSET_PRIVATE_P (charset)) -#endif { DECODE_OUTPUT_PARTIAL_CHAR (ch); -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), - dst); -#else Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1); Dynarr_add (dst, lb); Dynarr_add (dst, c | 0x80); -#endif } else { if (ch) { -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, - ch & 0x7F, - c & 0x7F), dst); -#else Dynarr_add (dst, lb); Dynarr_add (dst, ch | 0x80); Dynarr_add (dst, c | 0x80); -#endif ch = 0; } else @@ -5204,21 +4856,16 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, default: /* two-byte private */ if (ch) { -#ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, - ch & 0x7F, - c & 0x7F), dst); -#else Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2); Dynarr_add (dst, lb); Dynarr_add (dst, ch | 0x80); Dynarr_add (dst, c | 0x80); -#endif ch = 0; } else ch = c; } +#endif } if (!ch) @@ -5334,8 +4981,8 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch, Lisp_Coding_System* codesys = str->codesys; eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); int i; - Lisp_Object charset; - int half; + Lisp_Object charset = str->iso2022.current_charset; + int half = str->iso2022.current_half; unsigned int byte1, byte2; if (ch <= 0x7F) @@ -5493,6 +5140,8 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch, abort (); } } + str->iso2022.current_charset = charset; + str->iso2022.current_half = half; } void @@ -5561,80 +5210,54 @@ encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src, { c = *src++; #ifdef UTF2000 - switch (char_boundary) + if (char_boundary == 0) + if ( c >= 0xfc ) + { + ch = c & 0x01; + char_boundary = 5; + } + else if ( c >= 0xf8 ) + { + ch = c & 0x03; + char_boundary = 4; + } + else if ( c >= 0xf0 ) + { + ch = c & 0x07; + char_boundary = 3; + } + else if ( c >= 0xe0 ) + { + ch = c & 0x0f; + char_boundary = 2; + } + else if ( c >= 0xc0 ) + { + ch = c & 0x1f; + char_boundary = 1; + } + else + { + ch = 0; + if (c == '\n') + { + if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) + Dynarr_add (dst, '\r'); + if (eol_type != EOL_CR) + Dynarr_add (dst, c); + } + else + Dynarr_add (dst, c); + char_boundary = 0; + } + else if (char_boundary == 1) { - case 0: - if ( c >= 0xfc ) - { - ch = c & 0x01; - char_boundary = 5; - } - else if ( c >= 0xf8 ) - { - ch = c & 0x03; - char_boundary = 4; - } - else if ( c >= 0xf0 ) - { - ch = c & 0x07; - char_boundary = 3; - } - else if ( c >= 0xe0 ) - { - ch = c & 0x0f; - char_boundary = 2; - } - else if ( c >= 0xc0 ) - { - ch = c & 0x1f; - char_boundary = 1; - } - else - { - ch = 0; - - if (c == '\n') - { - if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) - Dynarr_add (dst, '\r'); - if (eol_type != EOL_CR) - Dynarr_add (dst, c); - } - else - Dynarr_add (dst, c); - char_boundary = 0; - } - break; - case 1: ch = ( ch << 6 ) | ( c & 0x3f ); - switch ( str->codesys->fixed.size ) - { - case 1: - Dynarr_add (dst, ch & 0xff); - break; - case 2: - Dynarr_add (dst, (ch >> 8) & 0xff); - Dynarr_add (dst, ch & 0xff); - break; - case 3: - Dynarr_add (dst, (ch >> 16) & 0xff); - Dynarr_add (dst, (ch >> 8) & 0xff); - Dynarr_add (dst, ch & 0xff); - break; - case 4: - Dynarr_add (dst, (ch >> 24) & 0xff); - Dynarr_add (dst, (ch >> 16) & 0xff); - Dynarr_add (dst, (ch >> 8) & 0xff); - Dynarr_add (dst, ch & 0xff); - break; - default: - fprintf(stderr, "It seems %d bytes stream.\n", - str->codesys->fixed.size); - abort (); - } + Dynarr_add (dst, ch & 0xff); char_boundary = 0; - break; - default: + } + else + { ch = ( ch << 6 ) | ( c & 0x3f ); char_boundary--; } @@ -5731,10 +5354,6 @@ syms_of_file_coding (void) DEFSUBR (Fencode_shift_jis_char); DEFSUBR (Fdecode_big5_char); DEFSUBR (Fencode_big5_char); - DEFSUBR (Fset_ucs_char); - DEFSUBR (Fucs_char); - DEFSUBR (Fset_char_ucs); - DEFSUBR (Fchar_ucs); #endif /* MULE */ defsymbol (&Qcoding_systemp, "coding-system-p"); defsymbol (&Qno_conversion, "no-conversion"); @@ -5971,15 +5590,4 @@ complex_vars_of_file_coding (void) fcd->coding_category_system[CODING_CATEGORY_UTF8] = Fget_coding_system (Qutf8); #endif - -#if defined(MULE) && !defined(UTF2000) - { - size_t i; - - for (i = 0; i < countof (fcd->ucs_to_mule_table); i++) - fcd->ucs_to_mule_table[i] = Qnil; - } - staticpro (&mule_to_ucs_table); - mule_to_ucs_table = Fmake_char_table(Qgeneric); -#endif /* defined(MULE) && !defined(UTF2000) */ } -- 1.7.10.4