Basically sync with r21-2-19-utf-2000-0_7-0.

author tomo <tomo>

Mon, 16 Dec 2002 05:06:18 +0000 (05:06 +0000)

committer tomo <tomo>

Mon, 16 Dec 2002 05:06:18 +0000 (05:06 +0000)
author tomo <tomo>
Mon, 16 Dec 2002 05:06:18 +0000 (05:06 +0000)
committer tomo <tomo>
Mon, 16 Dec 2002 05:06:18 +0000 (05:06 +0000)
diff --git a/lisp/ChangeLog b/lisp/ChangeLog

index b78dd2f..84c6eb5 100644 (file)
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,18 @@
+1999-09-14  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * mule/mule-charset.el (default-coded-charset-priority-list): Add
+       thai-tis620 and hebrew-iso8859-8; namely all BMP are covered.
+
+1999-09-13  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * mule/mule-charset.el (default-coded-charset-priority-list): Add
+       `cyrillic-iso8859-5' and `greek-iso8859-7'.
+
+1999-09-13  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * mule/mule-charset.el (default-coded-charset-priority-list):
+       Setup in UTF-2000.
+
  1999-09-09  MORIOKA Tomohiko  <tomo@etl.go.jp>
  
         * mule/viet-chars.el: Don't define charset
diff --git a/lisp/custom-load.el b/lisp/custom-load.el

index 7f0c64b..c7bddd7 100644 (file)
--- a/lisp/custom-load.el
+++ b/lisp/custom-load.el
@@ -73,7 +73,7 @@
  (custom-add-loads 'fill '("simple" "fill"))
  (custom-add-loads 'custom-magic-faces '("cus-edit"))
  (custom-add-loads 'display '("modeline" "toolbar" "scrollbar" "auto-show"))
-(custom-add-loads 'faces '("faces" "font" "cus-edit" "font-lock" "hyper-apropos" "info" "wid-edit"))
+(custom-add-loads 'faces '("faces" "cus-edit" "font-lock" "font" "hyper-apropos" "info" "wid-edit"))
  (custom-add-loads 'emacs '("faces" "help" "files" "cus-edit" "package-get"))
  (custom-add-loads 'processes '("process" "gnuserv"))
  (custom-add-loads 'hyper-apropos '("hyper-apropos"))
diff --git a/lisp/mule/mule-charset.el b/lisp/mule/mule-charset.el

index 9dd8fef..bd61908 100644 (file)
--- a/lisp/mule/mule-charset.el
+++ b/lisp/mule/mule-charset.el
@@ -3,6 +3,8 @@
  ;; Copyright (C) 1992 Free Software Foundation, Inc.
  ;; Copyright (C) 1995 Amdahl Corporation.
  ;; Copyright (C) 1996 Sun Microsystems.
+;; Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
+;; Licensed to the Free Software Foundation.
  
  ;; Author: Unknown
  ;; Keywords: i18n, mule, internal
@@ -258,4 +260,25 @@ DESCRIPTION (string) is the description string of the charset."
      (put-char-table (car l) t auto-fill-chars)
      (setq l (cdr l))))
  
+
+;;; @ Coded character set
+;;;
+
+(when (featurep 'utf-2000)
+  (setq default-coded-charset-priority-list
+       '(ascii
+         control-1
+         latin-iso8859-1
+         latin-iso8859-2
+         latin-iso8859-3
+         latin-iso8859-4
+         latin-iso8859-9
+         latin-jisx0201
+         cyrillic-iso8859-5
+         greek-iso8859-7
+         thai-tis620
+         hebrew-iso8859-8
+         vietnamese-viscii-lower
+         vietnamese-viscii-upper)))
+
  ;;; mule-charset.el ends here
diff --git a/src/ChangeLog b/src/ChangeLog

index a41ff49..91959cb 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,241 @@
+1999-09-14  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * char-ucs.h (breakup_char_1): Use
+       `Vdefault_coded_charset_priority_list' for hebrew-iso8859-8,
+       thai-tis620 and katakana-jisx0201 area.
+
+1999-09-13  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * char-ucs.h (breakup_char_1): Use
+       `Vdefault_coded_charset_priority_list' for cyrillic-iso8859-5
+       area.
+
+       * text-coding.c (reset_encoding_stream): Fixed.
+       (char_encode_ucs4): Delete `& 255'.
+
+       * char-ucs.h (breakup_char_1): Use
+       `Vdefault_coded_charset_priority_list' for greek-iso8859-7 area.
+
+1999-09-13  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * file-coding.c (Fmake_coding_system): Don't set up
+       `codesys->fixed.size'.
+       (encode_coding_no_conversion): Don't refer
+       `str->codesys->fixed.size'.
+
+1999-09-13  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * mule-charset.c, char-ucs.h (latin_a_char_to_charset): Deleted.
+       (latin_a_char_to_byte1): Deleted.
+       (latin_a_char_to_byte2): Deleted.
+
+1999-09-13  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * mule-charset.c (make_charset): Add new argument `ucs_min',
+       `ucs_max' and `code_offset'.
+       (charset_get_byte1): New implementation [delete specific charset
+       depended implementations].
+       (Fmake_charset): Modify for `make_charset'.
+       (Fmake_reverse_direction_charset): Likewise.
+       (complex_vars_of_mule_charset): Likewise.
+
+       * char-ucs.h (struct Lisp_Charset): Add `ucs_min', `ucs_max' and
+       `code_offset'.
+       (CHARSET_UCS_MIN): New macro.
+       (CHARSET_UCS_MAX): New macro.
+       (CHARSET_CODE_OFFSET): New macro.
+       (MAKE_CHAR): Delete charset depended definitions [except
+       katakana-jisx0201].
+
+1999-09-13  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * char-ucs.h (breakup_char_1): Use
+       `Vdefault_coded_charset_priority_list' for C0-Controls,
+       Basic-Latin, C1-Controls and Latin-1-Supplement area.
+
+1999-09-13  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * char-ucs.h (charset_get_byte1): New function.
+       (XCHARSET_GET_BYTE1): Deleted.
+       (charset_get_byte2): New function.
+       (XCHARSET_GET_BYTE2): Deleted.
+       (Vdefault_coded_charset_priority_list): New external variable.
+       (breakup_char_1): Use `charset_get_byte1', `charset_get_byte2' and
+       `Vdefault_preferred_coded_charset_list'.
+
+       * mule-charset.c (charset_get_byte1): New function.
+       (charset_get_byte2): New function.
+       (Vdefault_coded_charset_priority_list): New variable.
+       (vars_of_mule_charset): Add new variable
+       `default-coded-charset-priority-list'.
+
+1999-09-12  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * char-ucs.h (XCHARSET_GET_BYTE1): New inline function.
+       (XCHARSET_GET_BYTE2): New inline function.
+       (breakup_char_1): Use `XCHARSET_GET_BYTE1' and
+       `XCHARSET_GET_BYTE2'.
+
+1999-09-12  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * mule-charset.c (make_charset): Initialize
+       `CHARSET_TO_BYTE1_TABLE(cs)' and `CHARSET_TO_BYTE2_TABLE(cs)' by
+       NULL if table is not defined.
+
+1999-09-11  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * text-coding.c (char_encode_shift_jis): Use
+       `XCHARSET_TO_BYTE1_TABLE' for `Vcharset_latin_jisx0201' instead of
+       `ucs_to_latin_jisx0201'.
+
+       * mule-charset.c (ucs_to_latin_jisx0201): Deleted.
+       (ucs_to_latin_iso8859_2): Deleted.
+       (ucs_to_latin_iso8859_3): Deleted.
+       (ucs_to_latin_iso8859_4): Deleted.
+       (ucs_to_latin_iso8859_9): Deleted.
+       (ucs_to_latin_viscii_lower): Deleted.
+       (ucs_to_latin_viscii_upper): Deleted.
+       (ucs_to_latin_tcvn5712): Deleted.
+       (make_charset): Add new argument `decoding_table'; set up
+       `CHARSET_DECODING_TABLE(cs)' in UTF-2000; set up
+       `CHARSET_TO_BYTE1_TABLE(cs)' for 94-set and 96-set if
+       `decoding_table' is defined in UTF-2000.
+       (Fmake_charset): Modify for `make_charset'.
+       (Fmake_reverse_direction_charset): Likewise.
+       (complex_vars_of_mule_charset): Likewise; delete `GENERATE_94_SET'
+       and `GENERATE_96_SET'.
+
+       * char-ucs.h (latin_jisx0201_to_ucs): Deleted.
+       (ucs_to_latin_jisx0201): Deleted.
+       (latin_iso8859_2_to_ucs): Deleted.
+       (ucs_to_latin_iso8859_2): Deleted.
+       (latin_iso8859_3_to_ucs): Deleted.
+       (ucs_to_latin_iso8859_3): Deleted.
+       (latin_iso8859_4_to_ucs): Deleted.
+       (ucs_to_latin_iso8859_4): Deleted.
+       (latin_iso8859_9_to_ucs): Deleted.
+       (ucs_to_latin_iso8859_9): Deleted.
+       (latin_viscii_lower_to_ucs): Deleted.
+       (ucs_to_latin_viscii_lower): Deleted.
+       (latin_viscii_upper_to_ucs): Deleted.
+       (ucs_to_latin_viscii_upper): Deleted.
+       (struct Lisp_Charset): Renamed `encoding_table' to
+       `to_byte1_table'; add `to_byte2_table'.
+       (CHARSET_DECODING_TABLE): New macro.
+       (CHARSET_TO_BYTE1_TABLE): New macro.
+       (CHARSET_TO_BYTE2_TABLE): New macro.
+       (XCHARSET_DECODING_TABLE): New macro.
+       (XCHARSET_TO_BYTE1_TABLE): New macro.
+       (XCHARSET_TO_BYTE2_TABLE): New macro.
+       (MAKE_CHAR): Use `XCHARSET_DECODING_TABLE'; don't use `*_to_ucs'
+       tables.
+       (breakup_char_1): Use `XCHARSET_TO_BYTE1_TABLE' if it is defined;
+       don't use `ucs_to_*' tables.
+
+1999-09-11  MORIOKA Tomohiko  <tomo@urania.m17n.org>
+
+       * text-coding.c (Fmake_coding_system): Don't set up
+       `codesys->fixed.size'.
+       (encode_coding_no_conversion): Use `if' instead of `switch'.
+
+       * file-coding.h (struct Lisp_Coding_System): Delete `fixed.size'.
+
+1999-09-11  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * mule-charset.c (make_charset): Delete argument `rep_bytes'.
+       (Fmake_charset): Modify for `make_charset'.
+       (Fmake_reverse_direction_charset): Likewise.
+       (complex_vars_of_mule_charset): Likewise.
+
+1999-09-11  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * text-coding.c (char_encode_shift_jis): Use table
+       `ucs_to_latin_jisx0201' and BREAKUP_CHAR.
+
+1999-09-11  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * text-coding.c (text_encode_generic): Use `if' instead of
+       `switch'.
+       (decode_coding_sjis): Use `MAKE_CHAR' and `DECODE_ADD_UCS_CHAR' to
+       decode JIS-Latin.
+
+1999-09-10  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * text-coding.c (encode_coding_sjis): Deleted.
+       (char_encode_shift_jis): New function.
+       (char_finish_shift_jis): New function.
+       (reset_encoding_stream): Set up `encode_char' and `finish' for
+       `CODESYS_UCS4' and `CODESYS_SHIFT_JIS'.
+       (mule_encode): Use generic encoder for `CODESYS_SHIFT_JIS'.
+       (char_encode_utf8): Treat `eol_type'.
+
+1999-09-10  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * file-coding.c (decode_coding_iso2022): Use
+       `DECODE_ADD_UCS_CHAR'; don't use `XCHARSET_REP_BYTES'.
+
+1999-09-10  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * mule-charset.c (vars_of_mule_charset): Update `utf-2000-version'
+       to 0.7 (Hirano).
+
+1999-09-10  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * char-lb.h (CHAR_COLUMNS): New macro.
+
+1999-09-09  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * text-coding.c (char_encode_ucs4): New function.
+       (char_finish_ucs4): New function.
+       (encode_coding_ucs4): Deleted.
+       (mule_encode): Use generic encoder for `CODESYS_UCS4'.
+       (text_encode_generic): Delete local variable `charset' and `half'.
+       (ucs_to_mule_table): Deleted.
+       (mule_to_ucs_table): Deleted.
+       (Fset_ucs_char): Deleted.
+       (ucs_to_char): Deleted.
+       (Fucs_char): Deleted.
+       (Fset_char_ucs): Deleted.
+       (Fchar_ucs): Deleted.
+       (decode_ucs4): Deleted.
+       (mule_char_to_ucs4): Deleted.
+       (encode_ucs4): Deleted.
+       (decode_coding_ucs4): Use `DECODE_ADD_UCS_CHAR'.
+       (decode_coding_utf8): Likewise.
+       (decode_coding_iso2022): Likewise; don't use `XCHARSET_REP_BYTES'.
+       (char_encode_iso2022): Fixed.
+       (syms_of_file_coding): Delete `Fset_ucs_char', `Fucs_char',
+       `Fset_char_ucs' and `Fchar_ucs'.
+       (complex_vars_of_file_coding): Don't initialize
+       `ucs_to_mule_table'.
+
+       * objects-tty.c (tty_initialize_font_instance): Don't use
+       `XCHARSET_COLUMNS'.
+
+       * mule-charset.c (make_charset): Don't set up CHARSET_REP_BYTES in
+       UTF-2000.
+
+       * redisplay-tty.c (tty_output_display_block): Use `CHAR_COLUMNS'
+       instead of `XCHARSET_COLUMNS' and `CHAR_CHARSET'.
+
+       * insdel.c (bufbyte_string_displayed_columns): Use `CHAR_COLUMNS'
+       instead of `XCHARSET_COLUMNS' and `CHAR_CHARSET'.
+       (emchar_string_displayed_columns): Likewise.
+
+       * indent.c (column_at_point): Use `CHAR_COLUMNS' instead of
+       `XCHARSET_COLUMNS' and `CHAR_CHARSET'.
+       (string_column_at_point): Likewise.
+       (Fmove_to_column): Likewise.
+
+       * char-ucs.h (struct Lisp_Charset): Delete `rep_bytes'; add
+       `encoding_table' and `decoding_table'.
+       (CHARSET_REP_BYTES): Deleted.
+       (XCHARSET_REP_BYTES): Deleted.
+       (XCHARSET_COLUMNS): Deleted.
+       (CHAR_COLUMNS): New macro.
+       (lookup_composite_char): Deleted unconditionally.
+       (composite_char_string): Likewise.
+
  1999-09-09  MORIOKA Tomohiko  <tomo@etl.go.jp>
  
         * char-ucs.h (Emchar_to_byte_table): New type.
diff --git a/src/char-lb.h b/src/char-lb.h

index a9bbce9..01ec5d5 100644 (file)
--- a/src/char-lb.h
+++ b/src/char-lb.h
@@ -35,4 +35,6 @@ valid_char_p (Emchar ch)
    return ((unsigned int) (ch) <= 0xff) || non_ascii_valid_char_p (ch);
  }
  
+#define CHAR_COLUMNS(c)     (XCHARSET_COLUMNS(CHAR_CHARSET(c)))
+
  #endif /* _XEMACS_CHAR_LB_H */
diff --git a/src/char-ucs.h b/src/char-ucs.h

index 9482331..0418aea 100644 (file)
--- a/src/char-ucs.h
+++ b/src/char-ucs.h
@@ -29,32 +29,17 @@ Boston, MA 02111-1307, USA.  */
  #define CHAR_ASCII_P(ch) ((ch) <= 0x7F)
  
  \f
-unsigned char
-get_byte_from_character_table (Emchar ch,
-                              Emchar* table, size_t size, unsigned char offset);
+int
+get_byte_from_character_table (Emchar ch, Lisp_Object ccs);
  
  extern Lisp_Object Vcharset_ucs_bmp;
-
  extern Lisp_Object Vcharset_latin_jisx0201;
-extern Emchar latin_jisx0201_to_ucs[94];
-
  extern Lisp_Object Vcharset_latin_iso8859_2;
-extern Emchar latin_iso8859_2_to_ucs[96];
-
  extern Lisp_Object Vcharset_latin_iso8859_3;
-extern Emchar latin_iso8859_3_to_ucs[96];
-
  extern Lisp_Object Vcharset_latin_iso8859_4;
-extern Emchar latin_iso8859_4_to_ucs[96];
-
  extern Lisp_Object Vcharset_latin_iso8859_9;
-extern Emchar latin_iso8859_9_to_ucs[96];
-
  extern Lisp_Object Vcharset_latin_viscii_lower;
-extern Emchar latin_viscii_lower_to_ucs[96];
-
  extern Lisp_Object Vcharset_latin_viscii_upper;
-extern Emchar latin_viscii_upper_to_ucs[96];
  
  \f
  /************************************************************************/
@@ -206,11 +191,6 @@ struct Lisp_Charset
    /* Final byte of this character set in ISO2022 designating escape sequence */
    Bufbyte final;
  
-  /* Number of bytes (1 - 4) required in the internal representation
-     for characters in this character set.  This is *not* the
-     same as the dimension of the character set). */
-  unsigned int rep_bytes;
-
    /* Number of columns a character in this charset takes up, on TTY
       devices.  Not used for X devices. */
    unsigned int columns;
@@ -229,6 +209,15 @@ struct Lisp_Charset
  
    /* Which half of font to be used to display this character set */
    unsigned int graphic;
+
+  /* Byte->character mapping table */
+  Emchar* decoding_table;
+
+  /* Range of character code */
+  Emchar ucs_min, ucs_max;
+
+  /* Offset for external representation */
+  Emchar code_offset;
  };
  typedef struct Lisp_Charset Lisp_Charset;
  
@@ -255,7 +244,6 @@ DECLARE_LRECORD (charset, Lisp_Charset);
  #define CHARSET_NAME(cs)        ((cs)->name)
  #define CHARSET_SHORT_NAME(cs)  ((cs)->short_name)
  #define CHARSET_LONG_NAME(cs)   ((cs)->long_name)
-#define CHARSET_REP_BYTES(cs)   ((cs)->rep_bytes)
  #define CHARSET_COLUMNS(cs)     ((cs)->columns)
  #define CHARSET_GRAPHIC(cs)     ((cs)->graphic)
  #define CHARSET_TYPE(cs)        ((cs)->type)
@@ -267,14 +255,16 @@ DECLARE_LRECORD (charset, Lisp_Charset);
  #define CHARSET_DIMENSION(cs)   ((cs)->dimension)
  #define CHARSET_CHARS(cs)       ((cs)->chars)
  #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
+#define CHARSET_DECODING_TABLE(cs) ((cs)->decoding_table)
+#define CHARSET_UCS_MIN(cs)     ((cs)->ucs_min)
+#define CHARSET_UCS_MAX(cs)     ((cs)->ucs_max)
+#define CHARSET_CODE_OFFSET(cs)         ((cs)->code_offset)
  
  
  #define XCHARSET_ID(cs)                  CHARSET_ID           (XCHARSET (cs))
  #define XCHARSET_NAME(cs)        CHARSET_NAME         (XCHARSET (cs))
  #define XCHARSET_SHORT_NAME(cs)          CHARSET_SHORT_NAME   (XCHARSET (cs))
  #define XCHARSET_LONG_NAME(cs)   CHARSET_LONG_NAME    (XCHARSET (cs))
-#define XCHARSET_REP_BYTES(cs)   CHARSET_REP_BYTES    (XCHARSET (cs))
-#define XCHARSET_COLUMNS(cs)     CHARSET_COLUMNS      (XCHARSET (cs))
  #define XCHARSET_GRAPHIC(cs)      CHARSET_GRAPHIC      (XCHARSET (cs))
  #define XCHARSET_TYPE(cs)        CHARSET_TYPE         (XCHARSET (cs))
  #define XCHARSET_DIRECTION(cs)   CHARSET_DIRECTION    (XCHARSET (cs))
@@ -287,6 +277,7 @@ DECLARE_LRECORD (charset, Lisp_Charset);
  #define XCHARSET_CHARS(cs)       CHARSET_CHARS        (XCHARSET (cs))
  #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
    CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
+#define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs))
  
  struct charset_lookup {
    /* Table of charsets indexed by (leading byte - MIN_LEADING_BYTE). */
@@ -369,41 +360,19 @@ INLINE_HEADER Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2);
  INLINE_HEADER Emchar
  MAKE_CHAR (Lisp_Object charset, int c1, int c2)
  {
-  if (EQ (charset, Vcharset_ascii))
-    return c1;
-  else if (EQ (charset, Vcharset_control_1))
-    return c1 | 0x80;
-  else if (EQ (charset, Vcharset_ucs_bmp))
-    return (c1 << 8) | c2;
-  else if (EQ (charset, Vcharset_latin_iso8859_1))
-    return c1 | 0x80;
-  else if (EQ (charset, Vcharset_latin_iso8859_2))
-    return latin_iso8859_2_to_ucs[c1 - 32];
-  else if (EQ (charset, Vcharset_latin_iso8859_3))
-    return latin_iso8859_3_to_ucs[c1 - 32];
-  else if (EQ (charset, Vcharset_latin_iso8859_4))
-    return latin_iso8859_4_to_ucs[c1 - 32];
-  else if (EQ (charset, Vcharset_cyrillic_iso8859_5))
-    return c1 + MIN_CHAR_CYRILLIC - 0x20;
-  else if (EQ (charset, Vcharset_greek_iso8859_7))
-    return c1 + MIN_CHAR_GREEK - 0x20;
-  else if (EQ (charset, Vcharset_hebrew_iso8859_8))
-    return c1 + MIN_CHAR_HEBREW - 0x20;
-  else if (EQ (charset, Vcharset_latin_iso8859_9))
-    return latin_iso8859_9_to_ucs[c1 - 32];
-  else if (EQ (charset, Vcharset_thai_tis620))
-    return c1 + MIN_CHAR_THAI - 0x20;
+  Emchar* decoding_table;
+  
+  if ((decoding_table = XCHARSET_DECODING_TABLE (charset)) != NULL)
+    return decoding_table[c1 - (XCHARSET_CHARS (charset) == 94 ? 33 : 32)];
    else if (EQ (charset, Vcharset_katakana_jisx0201))
      if (c1 < 0x60)
        return c1 + MIN_CHAR_HALFWIDTH_KATAKANA - 0x20;
      else
-      return 32;
-  else if (EQ (charset, Vcharset_latin_jisx0201))
-    return latin_jisx0201_to_ucs[c1 - 33];
-  else if (EQ (charset, Vcharset_latin_viscii_lower))
-    return latin_viscii_lower_to_ucs[c1 - 32];
-  else if (EQ (charset, Vcharset_latin_viscii_upper))
-    return latin_viscii_upper_to_ucs[c1 - 32];
+      /* return MIN_CHAR_94 + ('I' - '0') * 94 + (c1 - 33); */
+      return ' ';
+  else if (CHARSET_UCS_MAX (XCHARSET (charset)))
+    return c1 - CHARSET_CODE_OFFSET (XCHARSET (charset))
+      + CHARSET_UCS_MIN (XCHARSET (charset));
    else if (XCHARSET_DIMENSION (charset) == 1)
      {
        switch (XCHARSET_CHARS (charset))
@@ -436,175 +405,78 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2)
      }
  }
  
-extern Charset_ID    latin_a_char_to_charset[128];
-extern unsigned char latin_a_char_to_byte1[128];
-extern unsigned char latin_a_char_to_byte2[128];
+unsigned char charset_get_byte1 (Lisp_Object charset, Emchar ch);
+unsigned char charset_get_byte2 (Lisp_Object charset, Emchar ch);
+
+extern Lisp_Object Vdefault_coded_charset_priority_list;
+EXFUN (Ffind_charset, 1);
  
  INLINE_HEADER void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2);
  INLINE_HEADER void
  breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2)
  {
-  if (c <= MAX_CHAR_BASIC_LATIN)
-    {
-      *charset = Vcharset_ascii;
-      *c1 = c;
-      *c2 = 0;
-    }
-  else if (c < 0xA0)
-    {
-      *charset = Vcharset_control_1;
-      *c1 = c & 0x7f;
-      *c2 = 0;
-    }
-  else if (c <= 0xff)
-    {
-      *charset = Vcharset_latin_iso8859_1;
-      *c1 = c & 0x7f;
-      *c2 = 0;
-    }
-  else if (c <= 0x17f)
-    {
-      *charset
-       = CHARSET_BY_LEADING_BYTE (latin_a_char_to_charset[c - 0x100]);
-      
-      if (EQ (*charset, Vcharset_latin_iso8859_2))
-       {
-         *c1 = get_byte_from_character_table (c, latin_iso8859_2_to_ucs, 96, 32);
-         *c2 = 0;
-       }
-      else if (EQ (*charset, Vcharset_latin_iso8859_3))
-       {
-         *c1 = get_byte_from_character_table (c, latin_iso8859_3_to_ucs, 96, 32);
-         *c2 = 0;
-       }
-      else if (EQ (*charset, Vcharset_latin_iso8859_4))
-       {
-         *c1 = get_byte_from_character_table (c, latin_iso8859_4_to_ucs, 96, 32);
-         *c2 = 0;
-       }
-      else if (EQ (*charset, Vcharset_latin_iso8859_9))
-       {
-         *c1 = get_byte_from_character_table (c, latin_iso8859_9_to_ucs, 96, 32);
-         *c2 = 0;
-       }
-      else
-       {
-         *c1 = latin_a_char_to_byte1[c - 0x100];
-         *c2 = latin_a_char_to_byte2[c - 0x100];
-       }
-    }
-  else if (c < MIN_CHAR_GREEK)
+  if (c < MIN_CHAR_94)
      {
-      if ( (*c1 = get_byte_from_character_table (c, latin_iso8859_2_to_ucs,
-                                                96, 32)) )
+      Lisp_Object charsets = Vdefault_coded_charset_priority_list;
+      while (!EQ (charsets, Qnil))
         {
-         *charset = Vcharset_latin_iso8859_2;
-         *c2 = 0;
+         *charset = Ffind_charset (Fcar (charsets));
+         if (!EQ (*charset, Qnil)
+             && (*c1 = charset_get_byte1 (*charset, c)) )
+           {
+             *c2 = charset_get_byte2 (*charset, c);
+             return;
+           }
+         charsets = Fcdr (charsets);         
         }
-      else if ( (*c1 =
-                get_byte_from_character_table (c, latin_iso8859_3_to_ucs,
-                                               96, 32)) )
+      /* otherwise --- maybe for bootstrap */
+      if (c <= MAX_CHAR_BASIC_LATIN)
         {
-         *charset = Vcharset_latin_iso8859_3;
-         *c2 = 0;
+         *charset = Vcharset_ascii;
+         *c1 = charset_get_byte1 (*charset, c);
+         *c2 = charset_get_byte2 (*charset, c);
         }
-      else if ( (*c1 =
-                get_byte_from_character_table (c, latin_iso8859_4_to_ucs,
-                                               96, 32)) )
+      else if (c < 0xA0)
         {
-         *charset = Vcharset_latin_iso8859_4;
-         *c2 = 0;
+         *charset = Vcharset_control_1;
+         *c1 = charset_get_byte1 (*charset, c);
+         *c2 = charset_get_byte2 (*charset, c);
         }
-      else if ( (*c1 =
-                get_byte_from_character_table (c, latin_iso8859_9_to_ucs,
-                                               96, 32)) )
+      else if (c <= 0xff)
         {
-         *charset = Vcharset_latin_iso8859_9;
-         *c2 = 0;
+         *charset = Vcharset_latin_iso8859_1;
+         *c1 = charset_get_byte1 (*charset, c);
+         *c2 = charset_get_byte2 (*charset, c);
         }
-      else if ( (*c1 =
-                get_byte_from_character_table (c, latin_viscii_lower_to_ucs,
-                                               96, 32)) )
+      else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
         {
-         *charset = Vcharset_latin_viscii_lower;
+         *charset = Vcharset_greek_iso8859_7;
+         *c1 = c - MIN_CHAR_GREEK + 0x20;
           *c2 = 0;
         }
-      else if ( (*c1 =
-                get_byte_from_character_table (c, latin_viscii_upper_to_ucs,
-                                               96, 32)) )
+      else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
         {
-         *charset = Vcharset_latin_viscii_upper;
+         *charset = Vcharset_cyrillic_iso8859_5;
+         *c1 = c - MIN_CHAR_CYRILLIC + 0x20;
           *c2 = 0;
         }
-      else
+      else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
         {
-         *charset = Vcharset_ucs_bmp;
-         *c1 = c >> 8;
-         *c2 = c & 0xff;
-       }
-    }
-  else if (c <= MAX_CHAR_GREEK)
-    {
-      *charset = Vcharset_greek_iso8859_7;
-      *c1 = c - MIN_CHAR_GREEK + 0x20;
-      *c2 = 0;
-    }
-  else if (c < MIN_CHAR_CYRILLIC)
-    {
-      *charset = Vcharset_ucs_bmp;
-      *c1 = c >> 8;
-      *c2 = c & 0xff;
-    }
-  else if (c <= MAX_CHAR_CYRILLIC)
-    {
-      *charset = Vcharset_cyrillic_iso8859_5;
-      *c1 = c - MIN_CHAR_CYRILLIC + 0x20;
-      *c2 = 0;
-    }
-  else if (c < MIN_CHAR_HEBREW)
-    {
-      *charset = Vcharset_ucs_bmp;
-      *c1 = c >> 8;
-      *c2 = c & 0xff;
-    }
-  else if (c <= MAX_CHAR_HEBREW)
-    {
-      *charset = Vcharset_hebrew_iso8859_8;
-      *c1 = c - MIN_CHAR_HEBREW + 0x20;
-      *c2 = 0;
-    }
-  else if (c < MIN_CHAR_THAI)
-    {
-      *charset = Vcharset_ucs_bmp;
-      *c1 = c >> 8;
-      *c2 = c & 0xff;
-    }
-  else if (c <= MAX_CHAR_THAI)
-    {
-      *charset = Vcharset_thai_tis620;
-      *c1 = c - MIN_CHAR_THAI + 0x20;
-      *c2 = 0;
-    }
-  else if (c < MIN_CHAR_HALFWIDTH_KATAKANA)
-    {
-      if ( (*c1 = get_byte_from_character_table (c, latin_jisx0201_to_ucs,
-                                                94, 33)) )
-       {
-         *charset = Vcharset_latin_jisx0201;
+         *charset = Vcharset_hebrew_iso8859_8;
+         *c1 = c - MIN_CHAR_HEBREW + 0x20;
           *c2 = 0;
         }
-      else if ( (*c1 = get_byte_from_character_table (c,
-                                                     latin_viscii_lower_to_ucs,
-                                                     96, 32)) )
+      else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
         {
-         *charset = Vcharset_latin_viscii_lower;
+         *charset = Vcharset_thai_tis620;
+         *c1 = c - MIN_CHAR_THAI + 0x20;
           *c2 = 0;
         }
-      else if ( (*c1 = get_byte_from_character_table (c,
-                                                     latin_viscii_upper_to_ucs,
-                                                     96, 32)) )
+      else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
+              && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
         {
-         *charset = Vcharset_latin_viscii_upper;
+         *charset = Vcharset_katakana_jisx0201;
+         *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20;
           *c2 = 0;
         }
        else
@@ -614,12 +486,6 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2)
           *c2 = c & 0xff;
         }
      }
-  else if (c <= MAX_CHAR_HALFWIDTH_KATAKANA)
-    {
-      *charset = Vcharset_katakana_jisx0201;
-      *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20;
-      *c2 = 0;
-    }
    else if (c <= MAX_CHAR_94)
      {
        *charset
@@ -679,22 +545,13 @@ CHAR_CHARSET (Emchar ch)
  
  #define CHAR_LEADING_BYTE(c) (XCHARSET_LEADING_BYTE(CHAR_CHARSET(c)))
  
-\f
-#ifdef ENABLE_COMPOSITE_CHARS
-/************************************************************************/
-/*                           Composite characters                       */
-/************************************************************************/
-
-Emchar lookup_composite_char (Bufbyte *str, int len);
-Lisp_Object composite_char_string (Emchar ch);
-#endif /* ENABLE_COMPOSITE_CHARS */
+#define CHAR_COLUMNS(c)     (CHARSET_COLUMNS(XCHARSET(CHAR_CHARSET(c))))
  
  \f
  /************************************************************************/
  /*                            Exported functions                        */
  /************************************************************************/
  
-EXFUN (Ffind_charset, 1);
  EXFUN (Fget_charset, 1);
  
  extern Lisp_Object Vcharset_chinese_big5_1;
diff --git a/src/depend b/src/depend

index 96f333d..1121482 100644 (file)
--- a/src/depend
+++ b/src/depend
@@ -216,6 +216,7 @@ sysdll.o: config.h sysdll.h
  termcap.o: $(LISP_H) conslots.h console.h device.h
  terminfo.o: config.h
  tests.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h lstream.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h opaque.h
+text-coding.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h elhash.h file-coding.h insdel.h lstream.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-ccl.h mule-charset.h multibyte.h opaque.h
  toolbar.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h conslots.h console.h device.h frame.h frameslots.h glyphs.h gui.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h redisplay.h scrollbar.h specifier.h toolbar.h window.h winslots.h
  tooltalk.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h elhash.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h process.h syssignal.h tooltalk.h
  tparam.o: config.h
diff --git a/src/file-coding.c b/src/file-coding.c

index 9774267..fdf4a31 100644 (file)
--- a/src/file-coding.c
+++ b/src/file-coding.c
@@ -892,10 +892,6 @@ if TYPE is 'ccl:
      CHECK_STRING (doc_string);
    CODING_SYSTEM_DOC_STRING (codesys) = doc_string;
  
-#ifdef UTF2000
-  if (ty == CODESYS_NO_CONVERSION)
-    codesys->fixed.size = 1;
-#endif
    {
      EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props)
        {
@@ -5290,9 +5286,23 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
                     charset = new_charset;
                 }
  
-#ifndef UTF2000
+#ifdef UTF2000
+             if (XCHARSET_DIMENSION (charset) == 1)
+               {
+                 DECODE_OUTPUT_PARTIAL_CHAR (ch);
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+               }
+             else if (ch)
+               {
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+                 ch = 0;
+               }
+             else
+               ch = c;
+#else
               lb = XCHARSET_LEADING_BYTE (charset);
-#endif
               switch (XCHARSET_REP_BYTES (charset))
                 {
                 case 1: /* ASCII */
@@ -5302,44 +5312,25 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
  
                 case 2: /* one-byte official */
                   DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
-                 DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst);
-#else
                   Dynarr_add (dst, lb);
                   Dynarr_add (dst, c | 0x80);
-#endif
                   break;
  
                 case 3: /* one-byte private or two-byte official */
-#ifdef UTF2000
-                 if (XCHARSET_DIMENSION (charset) == 1)
-#else
                   if (XCHARSET_PRIVATE_P (charset))
-#endif
                     {
                       DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
-                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0),
-                                         dst);
-#else
                       Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
                       Dynarr_add (dst, lb);
                       Dynarr_add (dst, c | 0x80);
-#endif
                     }
                   else
                     {
                       if (ch)
                         {
-#ifdef UTF2000
-                         DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
-                                                       ch & 0x7F,
-                                                       c & 0x7F), dst);
-#else
                           Dynarr_add (dst, lb);
                           Dynarr_add (dst, ch | 0x80);
                           Dynarr_add (dst, c | 0x80);
-#endif
                           ch = 0;
                         }
                       else
@@ -5350,21 +5341,16 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
                 default:        /* two-byte private */
                   if (ch)
                     {
-#ifdef UTF2000
-                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
-                                                   ch & 0x7F,
-                                                   c & 0x7F), dst);
-#else
                       Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
                       Dynarr_add (dst, lb);
                       Dynarr_add (dst, ch | 0x80);
                       Dynarr_add (dst, c | 0x80);
-#endif
                       ch = 0;
                     }
                   else
                     ch = c;
                 }
+#endif
             }
  
           if (!ch)
@@ -6088,31 +6074,7 @@ encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
           break;
         case 1:
           ch = ( ch << 6 ) | ( c & 0x3f );
-         switch ( str->codesys->fixed.size )
-           {
-           case 1:
-             Dynarr_add (dst, ch & 0xff);
-             break;
-           case 2:
-             Dynarr_add (dst, (ch >> 8) & 0xff);
-             Dynarr_add (dst,  ch       & 0xff);
-             break;
-           case 3:
-             Dynarr_add (dst, (ch >> 16) & 0xff);
-             Dynarr_add (dst, (ch >>  8) & 0xff);
-             Dynarr_add (dst,  ch        & 0xff);
-             break;
-           case 4:
-             Dynarr_add (dst, (ch >> 24) & 0xff);
-             Dynarr_add (dst, (ch >> 16) & 0xff);
-             Dynarr_add (dst, (ch >>  8) & 0xff);
-             Dynarr_add (dst,  ch        & 0xff);
-             break;
-           default:
-             fprintf(stderr, "It seems %d bytes stream.\n",
-                     str->codesys->fixed.size);
-             abort ();
-           }
+         Dynarr_add (dst, ch & 0xff);
           char_boundary = 0;
           break;
         default:
diff --git a/src/file-coding.h b/src/file-coding.h

index 895093a..322dbec 100644 (file)
--- a/src/file-coding.h
+++ b/src/file-coding.h
@@ -132,10 +132,6 @@ struct Lisp_Coding_System
    } iso2022;
    struct
    {
-    unsigned char size;
-  } fixed;
-  struct
-  {
      /* For a CCL coding system, these specify the CCL programs used for
         decoding (input) and encoding (output). */
      Lisp_Object decode;
diff --git a/src/indent.c b/src/indent.c

index 0ef743f..9ecf75b 100644 (file)
--- a/src/indent.c
+++ b/src/indent.c
@@ -169,7 +169,7 @@ column_at_point (struct buffer *buf, Bufpos init_pos, int cur_col)
                      + displayed_glyphs->end_columns));
  #else /* XEmacs */
  #ifdef MULE
-         col += XCHARSET_COLUMNS (CHAR_CHARSET (c));
+         col += CHAR_COLUMNS (c);
  #else
           col ++;
  #endif /* MULE */
@@ -226,7 +226,7 @@ string_column_at_point (Lisp_String* s, Bufpos init_pos, int tab_width)
         break;
        else
  #ifdef MULE
-         col += XCHARSET_COLUMNS (CHAR_CHARSET (c));
+         col += CHAR_COLUMNS (c);
  #else
           col ++;
  #endif /* MULE */
@@ -456,7 +456,7 @@ Returns the actual column that it moved to.
                      + displayed_glyphs->end_columns));
  #else /* XEmacs */
  #ifdef MULE
-         col += XCHARSET_COLUMNS (CHAR_CHARSET (c));
+         col += CHAR_COLUMNS (c);
  #else
           col ++;
  #endif /* MULE */
diff --git a/src/insdel.c b/src/insdel.c

index 2530222..046697c 100644 (file)
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -3162,7 +3162,7 @@ bufbyte_string_displayed_columns (const Bufbyte *str, Bytecount len)
      {
  #ifdef MULE
        Emchar ch = charptr_emchar (str);
-      cols += XCHARSET_COLUMNS (CHAR_CHARSET (ch));
+      cols += CHAR_COLUMNS (ch);
  #else
        cols++;
  #endif
@@ -3180,7 +3180,7 @@ emchar_string_displayed_columns (const Emchar *str, Charcount len)
    int i;
  
    for (i = 0; i < len; i++)
-    cols += XCHARSET_COLUMNS (CHAR_CHARSET (str[i]));
+    cols += CHAR_COLUMNS (str[i]);
  
    return cols;
  #else  /* not MULE */
diff --git a/src/mule-charset.c b/src/mule-charset.c

index c0fe875..69278bf 100644 (file)
--- a/src/mule-charset.c
+++ b/src/mule-charset.c
@@ -135,18 +135,25 @@ const Bytecount rep_bytes_by_first_byte[0xA0] =
  #endif
  
  #ifdef UTF2000
-unsigned char
-get_byte_from_character_table (Emchar ch,
-                              Emchar* table, size_t size, unsigned char offset)
+int
+get_byte_from_character_table (Emchar ch, Lisp_Object ccs)
  {
-  size_t i;
+  Lisp_Charset* cs = XCHARSET(ccs);
  
-  for (i = 0; i < size; i++)
+  if (CHARSET_DIMENSION (cs) == 1)
      {
-      if (table[i] == ch)
-       return i + offset;
+      Emchar* table = CHARSET_DECODING_TABLE (cs);
+      size_t size = CHARSET_CHARS (cs);
+      unsigned char offset = CHARSET_CODE_OFFSET (cs);
+      size_t i;
+
+      for (i = 0; i < size; i++)
+       {
+         if (table[i] == ch)
+           return i + offset;
+       }
      }
-  return 0;
+  return -1;
  }
  
  #define CHAR96(ft,b)   (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32)
@@ -249,7 +256,6 @@ Emchar latin_jisx0201_to_ucs[94] =
    0x203E /* 0x7E       OVERLINE */
  };
  
-
  Emchar latin_iso8859_2_to_ucs[96] =
  {
    0x00A0 /* 0xA0       NO-BREAK SPACE */,
@@ -850,7 +856,6 @@ Emchar latin_viscii_upper_to_ucs[96] =
    CHAR96('2', 0x7f)
  };
  
-
  Emchar latin_tcvn5712_to_ucs[96] =
  {
    0x00A0 /* 0xA0  NO-BREAK SPACE */,
@@ -951,399 +956,6 @@ Emchar latin_tcvn5712_to_ucs[96] =
    0x1ED0 /* 0xFF  LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
  };
  
-Charset_ID latin_a_char_to_charset[128] = {
-  /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0114 */ LEADING_BYTE_UCS_BMP,
-  /* U+0115 */ LEADING_BYTE_UCS_BMP,
-  /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+012C */ LEADING_BYTE_UCS_BMP,
-  /* U+012D */ LEADING_BYTE_UCS_BMP,
-  /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+014E */ LEADING_BYTE_UCS_BMP,
-  /* U+014F */ LEADING_BYTE_UCS_BMP,
-  /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
-  /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
-  /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
-  /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
-  /* U+017F */ LEADING_BYTE_UCS_BMP
-};
-
-unsigned char latin_a_char_to_byte1[128] = {
-  /* U+0100 */ 0xC0 - 0x80,
-  /* U+0101 */ 0xE0 - 0x80,
-  /* U+0102 */ 0xC3 - 0x80,
-  /* U+0103 */ 0xE3 - 0x80,
-  /* U+0104 */ 0xA1 - 0x80,
-  /* U+0105 */ 0xB1 - 0x80,
-  /* U+0106 */ 0xC6 - 0x80,
-  /* U+0107 */ 0xE6 - 0x80,
-  /* U+0108 */ 0xC6 - 0x80,
-  /* U+0109 */ 0xE6 - 0x80,
-  /* U+010A */ 0xC5 - 0x80,
-  /* U+010B */ 0xE5 - 0x80,
-  /* U+010C */ 0xC8 - 0x80,
-  /* U+010D */ 0xE8 - 0x80,
-  /* U+010E */ 0xCF - 0x80,
-  /* U+010F */ 0xEF - 0x80,
-  /* U+0110 */ 0xD0 - 0x80,
-  /* U+0111 */ 0xF0 - 0x80,
-  /* U+0112 */ 0xAA - 0x80,
-  /* U+0113 */ 0xBA - 0x80,
-  /* U+0114 */ 0x01,
-  /* U+0115 */ 0x01,
-  /* U+0116 */ 0xCC - 0x80,
-  /* U+0117 */ 0xEC - 0x80,
-  /* U+0118 */ 0xCA - 0x80,
-  /* U+0119 */ 0xEA - 0x80,
-  /* U+011A */ 0xCC - 0x80,
-  /* U+011B */ 0xEC - 0x80,
-  /* U+011C */ 0xD8 - 0x80,
-  /* U+011D */ 0xF8 - 0x80,
-  /* U+011E */ 0xAB - 0x80,
-  /* U+011F */ 0xBB - 0x80,
-  /* U+0120 */ 0xD5 - 0x80,
-  /* U+0121 */ 0xF5 - 0x80,
-  /* U+0122 */ 0xAB - 0x80,
-  /* U+0123 */ 0xBB - 0x80,
-  /* U+0124 */ 0xA6 - 0x80,
-  /* U+0125 */ 0xB6 - 0x80,
-  /* U+0126 */ 0xA1 - 0x80,
-  /* U+0127 */ 0xB1 - 0x80,
-  /* U+0128 */ 0xA5 - 0x80,
-  /* U+0129 */ 0xB5 - 0x80,
-  /* U+012A */ 0xCF - 0x80,
-  /* U+012B */ 0xEF - 0x80,
-  /* U+012C */ 0x01,
-  /* U+012D */ 0x01,
-  /* U+012E */ 0xC7 - 0x80,
-  /* U+012F */ 0xE7 - 0x80,
-  /* U+0130 */ 0xA9 - 0x80,
-  /* U+0131 */ 0xB9 - 0x80,
-  /* U+0132 */ 0x29,
-  /* U+0133 */ 0x29,
-  /* U+0134 */ 0xAC - 0x80,
-  /* U+0135 */ 0xBC - 0x80,
-  /* U+0136 */ 0xD3 - 0x80,
-  /* U+0137 */ 0xF3 - 0x80,
-  /* U+0138 */ 0xA2 - 0x80,
-  /* U+0139 */ 0xC5 - 0x80,
-  /* U+013A */ 0xE5 - 0x80,
-  /* U+013B */ 0xA6 - 0x80,
-  /* U+013C */ 0xB6 - 0x80,
-  /* U+013D */ 0xA5 - 0x80,
-  /* U+013E */ 0xB5 - 0x80,
-  /* U+013F */ 0x29,
-  /* U+0140 */ 0x29,
-  /* U+0141 */ 0xA3 - 0x80,
-  /* U+0142 */ 0xB3 - 0x80,
-  /* U+0143 */ 0xD1 - 0x80,
-  /* U+0144 */ 0xF1 - 0x80,
-  /* U+0145 */ 0xD1 - 0x80,
-  /* U+0146 */ 0xF1 - 0x80,
-  /* U+0147 */ 0xD2 - 0x80,
-  /* U+0148 */ 0xF2 - 0x80,
-  /* U+0149 */ 0x29,
-  /* U+014A */ 0xBD - 0x80,
-  /* U+014B */ 0xBF - 0x80,
-  /* U+014C */ 0xD2 - 0x80,
-  /* U+014D */ 0xF2 - 0x80,
-  /* U+014E */ 0x01,
-  /* U+014F */ 0x01,
-  /* U+0150 */ 0xD5 - 0x80,
-  /* U+0151 */ 0xF5 - 0x80,
-  /* U+0152 */ 0x29,
-  /* U+0153 */ 0x29,
-  /* U+0154 */ 0xC0 - 0x80,
-  /* U+0155 */ 0xE0 - 0x80,
-  /* U+0156 */ 0xA3 - 0x80,
-  /* U+0157 */ 0xB3 - 0x80,
-  /* U+0158 */ 0xD8 - 0x80,
-  /* U+0159 */ 0xF8 - 0x80,
-  /* U+015A */ 0xA6 - 0x80,
-  /* U+015B */ 0xB6 - 0x80,
-  /* U+015C */ 0xDE - 0x80,
-  /* U+015D */ 0xFE - 0x80,
-  /* U+015E */ 0xAA - 0x80,
-  /* U+015F */ 0xBA - 0x80,
-  /* U+0160 */ 0xA9 - 0x80,
-  /* U+0161 */ 0xB9 - 0x80,
-  /* U+0162 */ 0xDE - 0x80,
-  /* U+0163 */ 0xFE - 0x80,
-  /* U+0164 */ 0xAB - 0x80,
-  /* U+0165 */ 0xBB - 0x80,
-  /* U+0166 */ 0xAC - 0x80,
-  /* U+0167 */ 0xBC - 0x80,
-  /* U+0168 */ 0xDD - 0x80,
-  /* U+0169 */ 0xFD - 0x80,
-  /* U+016A */ 0xDE - 0x80,
-  /* U+016B */ 0xFE - 0x80,
-  /* U+016C */ 0xDD - 0x80,
-  /* U+016D */ 0xFD - 0x80,
-  /* U+016E */ 0xD9 - 0x80,
-  /* U+016F */ 0xF9 - 0x80,
-  /* U+0170 */ 0xDB - 0x80,
-  /* U+0171 */ 0xFB - 0x80,
-  /* U+0172 */ 0xD9 - 0x80,
-  /* U+0173 */ 0xF9 - 0x80,
-  /* U+0174 */ 0x2A,
-  /* U+0175 */ 0x2B,
-  /* U+0176 */ 0x2A,
-  /* U+0177 */ 0x2B,
-  /* U+0178 */ 0x2A,
-  /* U+0179 */ 0xAC - 0x80,
-  /* U+017A */ 0xBC - 0x80,
-  /* U+017B */ 0xAF - 0x80,
-  /* U+017C */ 0xBF - 0x80,
-  /* U+017D */ 0xAE - 0x80,
-  /* U+017E */ 0xBE - 0x80,
-  /* U+017F */ 0x01
-};
-
-unsigned char latin_a_char_to_byte2[128] = {
-  /* U+0100 */ 0x00,
-  /* U+0101 */ 0x00,
-  /* U+0102 */ 0x00,
-  /* U+0103 */ 0x00,
-  /* U+0104 */ 0x00,
-  /* U+0105 */ 0x00,
-  /* U+0106 */ 0x00,
-  /* U+0107 */ 0x00,
-  /* U+0108 */ 0x00,
-  /* U+0109 */ 0x00,
-  /* U+010A */ 0x00,
-  /* U+010B */ 0x00,
-  /* U+010C */ 0x00,
-  /* U+010D */ 0x00,
-  /* U+010E */ 0x00,
-  /* U+010F */ 0x00,
-  /* U+0110 */ 0x00,
-  /* U+0111 */ 0x00,
-  /* U+0112 */ 0x00,
-  /* U+0113 */ 0x00,
-  /* U+0114 */ 0x14,
-  /* U+0115 */ 0x15,
-  /* U+0116 */ 0x00,
-  /* U+0117 */ 0x00,
-  /* U+0118 */ 0x00,
-  /* U+0119 */ 0x00,
-  /* U+011A */ 0x00,
-  /* U+011B */ 0x00,
-  /* U+011C */ 0x00,
-  /* U+011D */ 0x00,
-  /* U+011E */ 0x00,
-  /* U+011F */ 0x00,
-  /* U+0120 */ 0x00,
-  /* U+0121 */ 0x00,
-  /* U+0122 */ 0x00,
-  /* U+0123 */ 0x00,
-  /* U+0124 */ 0x00,
-  /* U+0125 */ 0x00,
-  /* U+0126 */ 0x00,
-  /* U+0127 */ 0x00,
-  /* U+0128 */ 0x00,
-  /* U+0129 */ 0x00,
-  /* U+012A */ 0x00,
-  /* U+012B */ 0x00,
-  /* U+012C */ 0x2C,
-  /* U+012D */ 0x2D,
-  /* U+012E */ 0x00,
-  /* U+012F */ 0x00,
-  /* U+0130 */ 0x00,
-  /* U+0131 */ 0x00,
-  /* U+0132 */ 0x26,
-  /* U+0133 */ 0x46,
-  /* U+0134 */ 0x00,
-  /* U+0135 */ 0x00,
-  /* U+0136 */ 0x00,
-  /* U+0137 */ 0x00,
-  /* U+0138 */ 0x00,
-  /* U+0139 */ 0x00,
-  /* U+013A */ 0x00,
-  /* U+013B */ 0x00,
-  /* U+013C */ 0x00,
-  /* U+013D */ 0x00,
-  /* U+013E */ 0x00,
-  /* U+013F */ 0x29,
-  /* U+0140 */ 0x49,
-  /* U+0141 */ 0x00,
-  /* U+0142 */ 0x00,
-  /* U+0143 */ 0x00,
-  /* U+0144 */ 0x00,
-  /* U+0145 */ 0x00,
-  /* U+0146 */ 0x00,
-  /* U+0147 */ 0x00,
-  /* U+0148 */ 0x00,
-  /* U+0149 */ 0x4A,
-  /* U+014A */ 0x00,
-  /* U+014B */ 0x00,
-  /* U+014C */ 0x00,
-  /* U+014D */ 0x00,
-  /* U+014E */ 0x4E,
-  /* U+014F */ 0x4F,
-  /* U+0150 */ 0x00,
-  /* U+0151 */ 0x00,
-  /* U+0152 */ 0x2D,
-  /* U+0153 */ 0x4D,
-  /* U+0154 */ 0x00,
-  /* U+0155 */ 0x00,
-  /* U+0156 */ 0x00,
-  /* U+0157 */ 0x00,
-  /* U+0158 */ 0x00,
-  /* U+0159 */ 0x00,
-  /* U+015A */ 0x00,
-  /* U+015B */ 0x00,
-  /* U+015C */ 0x00,
-  /* U+015D */ 0x00,
-  /* U+015E */ 0x00,
-  /* U+015F */ 0x00,
-  /* U+0160 */ 0x00,
-  /* U+0161 */ 0x00,
-  /* U+0162 */ 0x00,
-  /* U+0163 */ 0x00,
-  /* U+0164 */ 0x00,
-  /* U+0165 */ 0x00,
-  /* U+0166 */ 0x00,
-  /* U+0167 */ 0x00,
-  /* U+0168 */ 0x00,
-  /* U+0169 */ 0x00,
-  /* U+016A */ 0x00,
-  /* U+016B */ 0x00,
-  /* U+016C */ 0x00,
-  /* U+016D */ 0x00,
-  /* U+016E */ 0x00,
-  /* U+016F */ 0x00,
-  /* U+0170 */ 0x00,
-  /* U+0171 */ 0x00,
-  /* U+0172 */ 0x00,
-  /* U+0173 */ 0x00,
-  /* U+0174 */ 0x71,
-  /* U+0175 */ 0x71,
-  /* U+0176 */ 0x74,
-  /* U+0177 */ 0x74,
-  /* U+0178 */ 0x73,
-  /* U+0179 */ 0x00,
-  /* U+017A */ 0x00,
-  /* U+017B */ 0x00,
-  /* U+017C */ 0x00,
-  /* U+017D */ 0x00,
-  /* U+017E */ 0x00,
-  /* U+017F */ 0x7F
-};
-
  Lisp_Object Vutf_2000_version;
  #endif
  
@@ -1781,17 +1393,20 @@ static const struct lrecord_description charset_description[] = {
  };
  
  DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
-                               mark_charset, print_charset, 0, 0, 0, charset_description,
+                               mark_charset, print_charset, 0, 0, 0,
+                              charset_description,
                                Lisp_Charset);
  
  /* Make a new charset. */
  /* #### SJT Should generic properties be allowed? */
  static Lisp_Object
-make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
+make_charset (Charset_ID id, Lisp_Object name,
               unsigned char type, unsigned char columns, unsigned char graphic,
-             Bufbyte final, unsigned char direction,  Lisp_Object short_name,
+             Bufbyte final, unsigned char direction, Lisp_Object short_name,
               Lisp_Object long_name, Lisp_Object doc,
-             Lisp_Object reg)
+             Lisp_Object reg,
+             Emchar* decoding_table,
+             Emchar ucs_min, Emchar ucs_max, Emchar code_offset)
  {
    Lisp_Object obj;
    Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
@@ -1804,7 +1419,6 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
    CHARSET_NAME         (cs) = name;
    CHARSET_SHORT_NAME   (cs) = short_name;
    CHARSET_LONG_NAME    (cs) = long_name;
-  CHARSET_REP_BYTES    (cs) = rep_bytes;
    CHARSET_DIRECTION    (cs) = direction;
    CHARSET_TYPE         (cs) = type;
    CHARSET_COLUMNS      (cs) = columns;
@@ -1814,7 +1428,13 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
    CHARSET_REGISTRY     (cs) = reg;
    CHARSET_CCL_PROGRAM  (cs) = Qnil;
    CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
-
+#ifdef UTF2000
+  CHARSET_DECODING_TABLE(cs) = decoding_table;
+  CHARSET_UCS_MIN(cs) = ucs_min;
+  CHARSET_UCS_MAX(cs) = ucs_max;
+  CHARSET_CODE_OFFSET(cs) = code_offset;
+#endif
+  
    switch ( CHARSET_TYPE (cs) )
      {
      case CHARSET_TYPE_94:
@@ -1845,6 +1465,15 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
  #endif
      }
  
+#ifndef UTF2000
+  if (id == LEADING_BYTE_ASCII)
+    CHARSET_REP_BYTES (cs) = 1;
+  else if (id < 0xA0)
+    CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
+  else
+    CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
+#endif
+  
    if (final)
      {
        /* some charsets do not have final characters.  This includes
@@ -1861,6 +1490,11 @@ make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
  
    assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
    chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
+#ifndef UTF2000
+  if (id < 0xA0)
+    /* official leading byte */
+    rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
+#endif
  
    /* Some charsets are "faux" and don't have names or really exist at
       all except in the leading-byte table. */
@@ -1897,6 +1531,87 @@ get_unallocated_leading_byte (int dimension)
    return lb;
  }
  
+#ifdef UTF2000
+unsigned char
+charset_get_byte1 (Lisp_Object charset, Emchar ch)
+{
+  Emchar* table = XCHARSET_DECODING_TABLE (charset);
+  int d;
+
+  if ( (table != NULL) &&
+       (XCHARSET_DIMENSION (charset) == 1) &&
+       ( (d = get_byte_from_character_table (ch, charset)) >= 0) )
+    return d;
+  else if ((CHARSET_UCS_MIN (XCHARSET (charset)) <= ch)
+          && (ch <= CHARSET_UCS_MAX (XCHARSET (charset))))
+    return ch - CHARSET_UCS_MIN (XCHARSET (charset))
+      +  CHARSET_CODE_OFFSET (XCHARSET (charset));
+  else if (XCHARSET_DIMENSION (charset) == 1)
+    {
+      if (XCHARSET_CHARS (charset) == 94)
+       {
+         if (((d = ch - (MIN_CHAR_94
+                         + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
+             && (d < 94))
+           return d + 32;
+       }
+      else if (XCHARSET_CHARS (charset) == 96)
+       {
+         if (((d = ch - (MIN_CHAR_96
+                         + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
+             && (d < 96))
+           return d + 33;
+       }
+      else
+       return 0;
+    }
+  else if (XCHARSET_DIMENSION (charset) == 2)
+    {
+      if (XCHARSET_CHARS (charset) == 94)
+       {
+         if (((d = ch - (MIN_CHAR_94x94
+                         + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0)
+             && (d < 94 * 94))
+           return (d / 94) + 33;
+       }
+      else if (XCHARSET_CHARS (charset) == 96)
+       {
+         if (((d = ch - (MIN_CHAR_96x96
+                         + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0)
+             && (d < 96 * 96))
+           return (d / 96) + 32;
+       }
+    }
+  return 0;
+}
+
+unsigned char
+charset_get_byte2 (Lisp_Object charset, Emchar ch)
+{
+  if (XCHARSET_DIMENSION (charset) == 1)
+    return 0;
+  else
+    {
+      if (EQ (charset, Vcharset_ucs_bmp))
+       return (ch >> 8) & 0xff;
+      else if (XCHARSET_CHARS (charset) == 94)
+       return (MIN_CHAR_94x94
+               + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
+         && (ch < MIN_CHAR_94x94
+             + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
+         ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
+      else /* if (XCHARSET_CHARS (charset) == 96) */
+       return (MIN_CHAR_96x96
+               + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
+         && (ch < MIN_CHAR_96x96
+             + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
+         ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
+    }
+}
+
+Lisp_Object Vdefault_coded_charset_priority_list;
+#endif
+
  \f
  /************************************************************************/
  /*                      Basic charset Lisp functions                    */
@@ -2208,8 +1923,11 @@ character set.  Recognized properties are:
  
    if (columns == -1)
      columns = dimension;
-  charset = make_charset (id, name, dimension + 2, type, columns, graphic,
-                         final, direction, short_name, long_name, doc_string, registry);
+  charset = make_charset (id, name, type, columns, graphic,
+                         final, direction, short_name, long_name,
+                         doc_string, registry,
+                         NULL,
+                         0, 0, 0);
    if (!NILP (ccl_program))
      XCHARSET_CCL_PROGRAM (charset) = ccl_program;
    return charset;
@@ -2254,9 +1972,11 @@ NEW-NAME is the name of the new charset.  Return the new charset.
    long_name = CHARSET_LONG_NAME (cs);
    registry = CHARSET_REGISTRY (cs);
  
-  new_charset = make_charset (id, new_name, dimension + 2, type, columns,
+  new_charset = make_charset (id, new_name, type, columns,
                               graphic, final, direction, short_name, long_name,
-                             doc_string, registry);
+                             doc_string, registry,
+                             NULL,
+                             0, 0, 0);
  
    CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
    XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
@@ -2789,10 +2509,16 @@ Leading-code of private TYPE9N charset of column-width 1.
  #endif
  
  #ifdef UTF2000
-  Vutf_2000_version = build_string("0.6 (Tōbushijō-mae)");
+  Vutf_2000_version = build_string("0.7 (Hirano)");
    DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
  Version number of UTF-2000.
  */ );
+
+  Vdefault_coded_charset_priority_list = Qnil;
+  DEFVAR_LISP ("default-coded-charset-priority-list",
+              &Vdefault_coded_charset_priority_list /*
+Default order of preferred coded-character-set.
+*/ );
  #endif
  }
  
@@ -2809,296 +2535,331 @@ complex_vars_of_mule_charset (void)
  #ifdef UTF2000
    staticpro (&Vcharset_ucs_bmp);
    Vcharset_ucs_bmp =
-    make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1,
+    make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
                   CHARSET_TYPE_256X256, 1, 0, 0,
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("BMP"),
                   build_string ("BMP"),
                   build_string ("BMP"),
-                 build_string (""));
+                 build_string (""),
+                 NULL, 0, 0xFFFF, 0);
  #endif
    staticpro (&Vcharset_ascii);
    Vcharset_ascii =
-    make_charset (LEADING_BYTE_ASCII, Qascii, 1,
+    make_charset (LEADING_BYTE_ASCII, Qascii,
                   CHARSET_TYPE_94, 1, 0, 'B',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("ASCII"),
                   build_string ("ASCII)"),
                   build_string ("ASCII (ISO646 IRV)"),
-                 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
+                 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
+                 NULL, 0, 0x7F, 0);
    staticpro (&Vcharset_control_1);
    Vcharset_control_1 =
-    make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
+    make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
                   CHARSET_TYPE_94, 1, 1, 0,
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("C1"),
                   build_string ("Control characters"),
                   build_string ("Control characters 128-191"),
-                 build_string (""));
+                 build_string (""),
+                 NULL, 0x80, 0x9F, 0);
    staticpro (&Vcharset_latin_iso8859_1);
    Vcharset_latin_iso8859_1 =
-    make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
+    make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
                   CHARSET_TYPE_96, 1, 1, 'A',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Latin-1"),
                   build_string ("ISO8859-1 (Latin-1)"),
                   build_string ("ISO8859-1 (Latin-1)"),
-                 build_string ("iso8859-1"));
+                 build_string ("iso8859-1"),
+                 NULL, 0xA0, 0xFF, 32);
    staticpro (&Vcharset_latin_iso8859_2);
    Vcharset_latin_iso8859_2 =
-    make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
+    make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
                   CHARSET_TYPE_96, 1, 1, 'B',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Latin-2"),
                   build_string ("ISO8859-2 (Latin-2)"),
                   build_string ("ISO8859-2 (Latin-2)"),
-                 build_string ("iso8859-2"));
+                 build_string ("iso8859-2"),
+                 latin_iso8859_2_to_ucs, 0, 0, 32);
    staticpro (&Vcharset_latin_iso8859_3);
    Vcharset_latin_iso8859_3 =
-    make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
+    make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
                   CHARSET_TYPE_96, 1, 1, 'C',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Latin-3"),
                   build_string ("ISO8859-3 (Latin-3)"),
                   build_string ("ISO8859-3 (Latin-3)"),
-                 build_string ("iso8859-3"));
+                 build_string ("iso8859-3"),
+                 latin_iso8859_3_to_ucs, 0, 0, 32);
    staticpro (&Vcharset_latin_iso8859_4);
    Vcharset_latin_iso8859_4 =
-    make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
+    make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
                   CHARSET_TYPE_96, 1, 1, 'D',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Latin-4"),
                   build_string ("ISO8859-4 (Latin-4)"),
                   build_string ("ISO8859-4 (Latin-4)"),
-                 build_string ("iso8859-4"));
+                 build_string ("iso8859-4"),
+                 latin_iso8859_4_to_ucs, 0, 0, 32);
    staticpro (&Vcharset_thai_tis620);
    Vcharset_thai_tis620 =
-    make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
+    make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
                   CHARSET_TYPE_96, 1, 1, 'T',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("TIS620"),
                   build_string ("TIS620 (Thai)"),
                   build_string ("TIS620.2529 (Thai)"),
-                 build_string ("tis620"));
+                 build_string ("tis620"),
+                 NULL, MIN_CHAR_THAI, MAX_CHAR_THAI, 32);
    staticpro (&Vcharset_greek_iso8859_7);
    Vcharset_greek_iso8859_7 =
-    make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
+    make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
                   CHARSET_TYPE_96, 1, 1, 'F',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("ISO8859-7"),
                   build_string ("ISO8859-7 (Greek)"),
                   build_string ("ISO8859-7 (Greek)"),
-                 build_string ("iso8859-7"));
+                 build_string ("iso8859-7"),
+                 NULL, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 32);
    staticpro (&Vcharset_arabic_iso8859_6);
    Vcharset_arabic_iso8859_6 =
-    make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
+    make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
                   CHARSET_TYPE_96, 1, 1, 'G',
                   CHARSET_RIGHT_TO_LEFT,
                   build_string ("ISO8859-6"),
                   build_string ("ISO8859-6 (Arabic)"),
                   build_string ("ISO8859-6 (Arabic)"),
-                 build_string ("iso8859-6"));
+                 build_string ("iso8859-6"),
+                 NULL, 0, 0, 32);
    staticpro (&Vcharset_hebrew_iso8859_8);
    Vcharset_hebrew_iso8859_8 =
-    make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
+    make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
                   CHARSET_TYPE_96, 1, 1, 'H',
                   CHARSET_RIGHT_TO_LEFT,
                   build_string ("ISO8859-8"),
                   build_string ("ISO8859-8 (Hebrew)"),
                   build_string ("ISO8859-8 (Hebrew)"),
-                 build_string ("iso8859-8"));
+                 build_string ("iso8859-8"),
+                 NULL, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 32);
    staticpro (&Vcharset_katakana_jisx0201);
    Vcharset_katakana_jisx0201 =
-    make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
+    make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
                   CHARSET_TYPE_94, 1, 1, 'I',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("JISX0201 Kana"),
                   build_string ("JISX0201.1976 (Japanese Kana)"),
                   build_string ("JISX0201.1976 Japanese Kana"),
-                 build_string ("jisx0201.1976"));
+                 build_string ("jisx0201.1976"),
+                 NULL,
+                 MIN_CHAR_HALFWIDTH_KATAKANA,
+                 MAX_CHAR_HALFWIDTH_KATAKANA, 33);
    staticpro (&Vcharset_latin_jisx0201);
    Vcharset_latin_jisx0201 =
-    make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
+    make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
                   CHARSET_TYPE_94, 1, 0, 'J',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("JISX0201 Roman"),
                   build_string ("JISX0201.1976 (Japanese Roman)"),
                   build_string ("JISX0201.1976 Japanese Roman"),
-                 build_string ("jisx0201.1976"));
+                 build_string ("jisx0201.1976"),
+                 latin_jisx0201_to_ucs, 0, 0, 33);
    staticpro (&Vcharset_cyrillic_iso8859_5);
    Vcharset_cyrillic_iso8859_5 =
-    make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
+    make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
                   CHARSET_TYPE_96, 1, 1, 'L',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("ISO8859-5"),
                   build_string ("ISO8859-5 (Cyrillic)"),
                   build_string ("ISO8859-5 (Cyrillic)"),
-                 build_string ("iso8859-5"));
+                 build_string ("iso8859-5"),
+                 NULL, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 32);
    staticpro (&Vcharset_latin_iso8859_9);
    Vcharset_latin_iso8859_9 =
-    make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
+    make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
                   CHARSET_TYPE_96, 1, 1, 'M',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Latin-5"),
                   build_string ("ISO8859-9 (Latin-5)"),
                   build_string ("ISO8859-9 (Latin-5)"),
-                 build_string ("iso8859-9"));
+                 build_string ("iso8859-9"),
+                 latin_iso8859_9_to_ucs, 0, 0, 32);
    staticpro (&Vcharset_japanese_jisx0208_1978);
    Vcharset_japanese_jisx0208_1978 =
-    make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
+    make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
                   CHARSET_TYPE_94X94, 2, 0, '@',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("JISX0208.1978"),
                   build_string ("JISX0208.1978 (Japanese)"),
                   build_string
                   ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
-                 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
+                 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_chinese_gb2312);
    Vcharset_chinese_gb2312 =
-    make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
+    make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
                   CHARSET_TYPE_94X94, 2, 0, 'A',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("GB2312"),
                   build_string ("GB2312)"),
                   build_string ("GB2312 Chinese simplified"),
-                 build_string ("gb2312"));
+                 build_string ("gb2312"),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_japanese_jisx0208);
    Vcharset_japanese_jisx0208 =
-    make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
+    make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
                   CHARSET_TYPE_94X94, 2, 0, 'B',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("JISX0208"),
                   build_string ("JISX0208.1983/1990 (Japanese)"),
                   build_string ("JISX0208.1983/1990 Japanese Kanji"),
-                 build_string ("jisx0208.19\\(83\\|90\\)"));
+                 build_string ("jisx0208.19\\(83\\|90\\)"),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_korean_ksc5601);
    Vcharset_korean_ksc5601 =
-    make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
+    make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
                   CHARSET_TYPE_94X94, 2, 0, 'C',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("KSC5601"),
                   build_string ("KSC5601 (Korean"),
                   build_string ("KSC5601 Korean Hangul and Hanja"),
-                 build_string ("ksc5601"));
+                 build_string ("ksc5601"),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_japanese_jisx0212);
    Vcharset_japanese_jisx0212 =
-    make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
+    make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
                   CHARSET_TYPE_94X94, 2, 0, 'D',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("JISX0212"),
                   build_string ("JISX0212 (Japanese)"),
                   build_string ("JISX0212 Japanese Supplement"),
-                 build_string ("jisx0212"));
+                 build_string ("jisx0212"),
+                 NULL, 0, 0, 33);
  
  #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
    staticpro (&Vcharset_chinese_cns11643_1);
    Vcharset_chinese_cns11643_1 =
-    make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
+    make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
                   CHARSET_TYPE_94X94, 2, 0, 'G',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("CNS11643-1"),
                   build_string ("CNS11643-1 (Chinese traditional)"),
                   build_string
                   ("CNS 11643 Plane 1 Chinese traditional"),
-                 build_string (CHINESE_CNS_PLANE_RE("1")));
+                 build_string (CHINESE_CNS_PLANE_RE("1")),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_chinese_cns11643_2);
    Vcharset_chinese_cns11643_2 =
-    make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
+    make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
                   CHARSET_TYPE_94X94, 2, 0, 'H',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("CNS11643-2"),
                   build_string ("CNS11643-2 (Chinese traditional)"),
                   build_string
                   ("CNS 11643 Plane 2 Chinese traditional"),
-                 build_string (CHINESE_CNS_PLANE_RE("2")));
+                 build_string (CHINESE_CNS_PLANE_RE("2")),
+                 NULL, 0, 0, 33);
  #ifdef UTF2000
    staticpro (&Vcharset_chinese_cns11643_3);
    Vcharset_chinese_cns11643_3 =
-    make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
+    make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3,
                   CHARSET_TYPE_94X94, 2, 0, 'I',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("CNS11643-3"),
                   build_string ("CNS11643-3 (Chinese traditional)"),
                   build_string
                   ("CNS 11643 Plane 3 Chinese traditional"),
-                 build_string (CHINESE_CNS_PLANE_RE("3")));
+                 build_string (CHINESE_CNS_PLANE_RE("3")),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_chinese_cns11643_4);
    Vcharset_chinese_cns11643_4 =
-    make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
+    make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4,
                   CHARSET_TYPE_94X94, 2, 0, 'J',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("CNS11643-4"),
                   build_string ("CNS11643-4 (Chinese traditional)"),
                   build_string
                   ("CNS 11643 Plane 4 Chinese traditional"),
-                 build_string (CHINESE_CNS_PLANE_RE("4")));
+                 build_string (CHINESE_CNS_PLANE_RE("4")),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_chinese_cns11643_5);
    Vcharset_chinese_cns11643_5 =
-    make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
+    make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5,
                   CHARSET_TYPE_94X94, 2, 0, 'K',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("CNS11643-5"),
                   build_string ("CNS11643-5 (Chinese traditional)"),
                   build_string
                   ("CNS 11643 Plane 5 Chinese traditional"),
-                 build_string (CHINESE_CNS_PLANE_RE("5")));
+                 build_string (CHINESE_CNS_PLANE_RE("5")),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_chinese_cns11643_6);
    Vcharset_chinese_cns11643_6 =
-    make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
+    make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6,
                   CHARSET_TYPE_94X94, 2, 0, 'L',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("CNS11643-6"),
                   build_string ("CNS11643-6 (Chinese traditional)"),
                   build_string
                   ("CNS 11643 Plane 6 Chinese traditional"),
-                 build_string (CHINESE_CNS_PLANE_RE("6")));
+                 build_string (CHINESE_CNS_PLANE_RE("6")),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_chinese_cns11643_7);
    Vcharset_chinese_cns11643_7 =
-    make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
+    make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7,
                   CHARSET_TYPE_94X94, 2, 0, 'M',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("CNS11643-7"),
                   build_string ("CNS11643-7 (Chinese traditional)"),
                   build_string
                   ("CNS 11643 Plane 7 Chinese traditional"),
-                 build_string (CHINESE_CNS_PLANE_RE("7")));
+                 build_string (CHINESE_CNS_PLANE_RE("7")),
+                 NULL, 0, 0, 33);
+  staticpro (&Vcharset_latin_viscii_lower);
    Vcharset_latin_viscii_lower =
-    make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 2,
+    make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
                   CHARSET_TYPE_96, 1, 1, '1',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("VISCII lower"),
                   build_string ("VISCII lower (Vietnamese)"),
                   build_string ("VISCII lower (Vietnamese)"),
-                 build_string ("VISCII1.1"));
+                 build_string ("VISCII1.1"),
+                 latin_viscii_lower_to_ucs, 0, 0, 32);
+  staticpro (&Vcharset_latin_viscii_upper);
    Vcharset_latin_viscii_upper =
-    make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 2,
+    make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
                   CHARSET_TYPE_96, 1, 1, '2',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("VISCII upper"),
                   build_string ("VISCII upper (Vietnamese)"),
                   build_string ("VISCII upper (Vietnamese)"),
-                 build_string ("VISCII1.1"));
+                 build_string ("VISCII1.1"),
+                 latin_viscii_upper_to_ucs, 0, 0, 32);
  #endif
    staticpro (&Vcharset_chinese_big5_1);
    Vcharset_chinese_big5_1 =
-    make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
+    make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
                   CHARSET_TYPE_94X94, 2, 0, '0',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Big5"),
                   build_string ("Big5 (Level-1)"),
                   build_string
                   ("Big5 Level-1 Chinese traditional"),
-                 build_string ("big5"));
+                 build_string ("big5"),
+                 NULL, 0, 0, 33);
    staticpro (&Vcharset_chinese_big5_2);
    Vcharset_chinese_big5_2 =
-    make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
+    make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
                   CHARSET_TYPE_94X94, 2, 0, '1',
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Big5"),
                   build_string ("Big5 (Level-2)"),
                   build_string
                   ("Big5 Level-2 Chinese traditional"),
-                 build_string ("big5"));
+                 build_string ("big5"),
+                 NULL, 0, 0, 33);
  
  #ifdef ENABLE_COMPOSITE_CHARS
    /* #### For simplicity, we put composite chars into a 96x96 charset.
@@ -3106,7 +2867,7 @@ complex_vars_of_mule_charset (void)
       room, esp. as we don't yet recycle numbers. */
    staticpro (&Vcharset_composite);
    Vcharset_composite =
-    make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
+    make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
                   CHARSET_TYPE_96X96, 2, 0, 0,
                   CHARSET_LEFT_TO_RIGHT,
                   build_string ("Composite"),
diff --git a/src/objects-tty.c b/src/objects-tty.c

index d8f1a34..ea76e6f 100644 (file)
--- a/src/objects-tty.c
+++ b/src/objects-tty.c
@@ -242,7 +242,7 @@ tty_initialize_font_instance (Lisp_Font_Instance *f, Lisp_Object name,
    FONT_INSTANCE_TTY_CHARSET (f) = charset;
  #ifdef MULE
    if (CHARSETP (charset))
-    f->width = XCHARSET_COLUMNS (charset);
+    f->width = CHARSET_COLUMNS (XCHARSET (charset));
    else
  #endif
      f->width = 1;
diff --git a/src/text-coding.c b/src/text-coding.c

index 8b20b52..3a8c3fe 100644 (file)
--- a/src/text-coding.c
+++ b/src/text-coding.c
@@ -186,8 +186,11 @@ static int detect_coding_sjis (struct detection_state *st,
                                const Extbyte *src, Lstream_data_count n);
  static void decode_coding_sjis (Lstream *decoding, const Extbyte *src,
                                 unsigned_char_dynarr *dst, Lstream_data_count n);
-static void encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
-                               unsigned_char_dynarr *dst, Lstream_data_count n);
+void char_encode_shift_jis (struct encoding_stream *str, Emchar c,
+                           unsigned_char_dynarr *dst, unsigned int *flags);
+void char_finish_shift_jis (struct encoding_stream *str,
+                           unsigned_char_dynarr *dst, unsigned int *flags);
+
  static int detect_coding_big5 (struct detection_state *st,
                                const Extbyte *src, Lstream_data_count n);
  static void decode_coding_big5 (Lstream *decoding, const Extbyte *src,
@@ -198,8 +201,11 @@ static int detect_coding_ucs4 (struct detection_state *st,
                                const Extbyte *src, Lstream_data_count n);
  static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
                                 unsigned_char_dynarr *dst, Lstream_data_count n);
-static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
-                               unsigned_char_dynarr *dst, Lstream_data_count n);
+void char_encode_ucs4 (struct encoding_stream *str, Emchar c,
+                      unsigned_char_dynarr *dst, unsigned int *flags);
+void char_finish_ucs4 (struct encoding_stream *str,
+                      unsigned_char_dynarr *dst, unsigned int *flags);
+
  static int detect_coding_utf8 (struct detection_state *st,
                                const Extbyte *src, Lstream_data_count n);
  static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
@@ -904,10 +910,6 @@ if TYPE is 'ccl:
      CHECK_STRING (doc_string);
    CODING_SYSTEM_DOC_STRING (codesys) = doc_string;
  
-#ifdef UTF2000
-  if (ty == CODESYS_NO_CONVERSION)
-    codesys->fixed.size = 1;
-#endif
    {
      EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props)
        {
@@ -2817,6 +2819,15 @@ reset_encoding_stream (struct encoding_stream *str)
      case CODESYS_UTF8:
        str->encode_char = &char_encode_utf8;
        str->finish = &char_finish_utf8;
+      break;
+    case CODESYS_UCS4:
+      str->encode_char = &char_encode_ucs4;
+      str->finish = &char_finish_ucs4;
+      break;
+    case CODESYS_SHIFT_JIS:
+      str->encode_char = &char_encode_shift_jis;
+      str->finish = &char_finish_shift_jis;
+      break;
      default:
        break;
      }
@@ -2932,15 +2943,9 @@ mule_encode (Lstream *encoding, const Bufbyte *src,
        encode_coding_no_conversion (encoding, src, dst, n);
        break;
  #ifdef MULE
-    case CODESYS_SHIFT_JIS:
-      encode_coding_sjis (encoding, src, dst, n);
-      break;
      case CODESYS_BIG5:
        encode_coding_big5 (encoding, src, dst, n);
        break;
-    case CODESYS_UCS4:
-      encode_coding_ucs4 (encoding, src, dst, n);
-      break;
      case CODESYS_CCL:
        str->ccl.last_block = str->flags & CODING_STATE_END;
        /* When applying ccl program to stream, MUST NOT set NULL
@@ -3030,64 +3035,57 @@ text_encode_generic (Lstream *encoding, const Bufbyte *src,
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
    unsigned int flags          = str->flags;
    Emchar ch                   = str->ch;
-  Lisp_Object charset;
-  int half;
  
    char_boundary = str->iso2022.current_char_boundary;
-  charset = str->iso2022.current_charset;
-  half = str->iso2022.current_half;
  
    while (n--)
      {
        c = *src++;
  
-      switch (char_boundary)
+      if (char_boundary == 0)
         {
-       case 0:
-         if ( c >= 0xfc )
+         if (c >= 0xfc)
             {
               ch = c & 0x01;
               char_boundary = 5;
             }
-         else if ( c >= 0xf8 )
+         else if (c >= 0xf8)
             {
               ch = c & 0x03;
               char_boundary = 4;
             }
-         else if ( c >= 0xf0 )
+         else if (c >= 0xf0)
             {
               ch = c & 0x07;
               char_boundary = 3;
             }
-         else if ( c >= 0xe0 )
+         else if (c >= 0xe0)
             {
               ch = c & 0x0f;
               char_boundary = 2;
             }
-         else if ( c >= 0xc0 )
+         else if (c >= 0xc0)
             {
               ch = c & 0x1f;
               char_boundary = 1;
             }
           else
-           {
-             (*str->encode_char) (str, c, dst, &flags);
-             ch = 0;
-             char_boundary = 0;
-           }
-         break;
-       case 1:
+           (*str->encode_char) (str, c, dst, &flags);
+       }
+      else if (char_boundary == 1)
+       {
           (*str->encode_char) (str, (ch << 6) | (c & 0x3f), dst, &flags);
           ch =0;
           char_boundary = 0;
-         break;
-       default:
-         ch = ( ch << 6 ) | ( c & 0x3f );
+       }
+      else
+       {
+         ch = (ch << 6) | (c & 0x3f);
           char_boundary--;
         }
      }
  
-  if ( (char_boundary == 0) && flags & CODING_STATE_END)
+  if ((char_boundary == 0) && (flags & CODING_STATE_END))
      {
        (*str->finish) (str, dst, &flags);
      }
@@ -3095,10 +3093,6 @@ text_encode_generic (Lstream *encoding, const Bufbyte *src,
    str->flags = flags;
    str->ch    = ch;
    str->iso2022.current_char_boundary = char_boundary;
-  str->iso2022.current_charset = charset;
-  str->iso2022.current_half = half;
-
-  /* Verbum caro factum est! */
  }
  
  \f
@@ -3212,6 +3206,11 @@ decode_coding_sjis (Lstream *decoding, const Extbyte *src,
               Dynarr_add (dst, c);
  #endif
             }
+#ifdef UTF2000
+         else if (c > 32)
+           DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_latin_jisx0201,
+                                         c, 0), dst);
+#endif
           else
             DECODE_ADD_BINARY_CHAR (c, dst);
         }
@@ -3224,136 +3223,61 @@ decode_coding_sjis (Lstream *decoding, const Extbyte *src,
    str->ch    = ch;
  }
  
-/* Convert internally-formatted data to Shift-JIS. */
+/* Convert internal character representation to Shift_JIS. */
  
-static void
-encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
-                   unsigned_char_dynarr *dst, Lstream_data_count n)
+void
+char_encode_shift_jis (struct encoding_stream *str, Emchar ch,
+                      unsigned_char_dynarr *dst, unsigned int *flags)
  {
-  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags  = str->flags;
-  unsigned int ch     = str->ch;
    eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
-#ifdef UTF2000
-  unsigned char char_boundary = str->iso2022.current_char_boundary;
-#endif
  
-  while (n--)
+  if (ch == '\n')
      {
-      Bufbyte c = *src++;
+      if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+       Dynarr_add (dst, '\r');
+      if (eol_type != EOL_CR)
+       Dynarr_add (dst, ch);
+    }
+  else
+    {
+      Lisp_Object charset;
+      unsigned int c1, c2, s1, s2;
+      
  #ifdef UTF2000
-      switch (char_boundary)
+      if ( (c1 =
+           get_byte_from_character_table (ch, Vcharset_latin_jisx0201))
+          >= 0 )
         {
-       case 0:
-         if ( c >= 0xfc )
-           {
-             ch = c & 0x01;
-             char_boundary = 5;
-           }
-         else if ( c >= 0xf8 )
-           {
-             ch = c & 0x03;
-             char_boundary = 4;
-           }
-         else if ( c >= 0xf0 )
-           {
-             ch = c & 0x07;
-             char_boundary = 3;
-           }
-         else if ( c >= 0xe0 )
-           {
-             ch = c & 0x0f;
-             char_boundary = 2;
-           }
-         else if ( c >= 0xc0 )
-           {
-             ch = c & 0x1f;
-             char_boundary = 1;
-           }
-         else
-           {
-             ch = 0;
-             if (c == '\n')
-               {
-                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
-                   Dynarr_add (dst, '\r');
-                 if (eol_type != EOL_CR)
-                   Dynarr_add (dst, c);
-               }
-             else
-               Dynarr_add (dst, c);
-             char_boundary = 0;
-           }
-         break;
-       case 1:
-         ch = ( ch << 6 ) | ( c & 0x3f );
-         {
-           Lisp_Object charset;
-           unsigned int c1, c2, s1, s2;
-           
-           BREAKUP_CHAR (ch, charset, c1, c2);
-           if (EQ(charset, Vcharset_katakana_jisx0201))
-             {
-               Dynarr_add (dst, c1 | 0x80);
-             }
-           else if (EQ(charset, Vcharset_japanese_jisx0208))
-             {
-               ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
-               Dynarr_add (dst, s1);
-               Dynarr_add (dst, s2);
-             }
-         }
-         char_boundary = 0;
-         break;
-       default:
-         ch = ( ch << 6 ) | ( c & 0x3f );
-         char_boundary--;
+         charset = Vcharset_latin_jisx0201;
+         c2 = 0;
         }
-#else
-      if (c == '\n')
+      else
+#endif
+       BREAKUP_CHAR (ch, charset, c1, c2);
+         
+      if (EQ(charset, Vcharset_katakana_jisx0201))
         {
-         if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
-           Dynarr_add (dst, '\r');
-         if (eol_type != EOL_CR)
-           Dynarr_add (dst, '\n');
-         ch = 0;
+         Dynarr_add (dst, c1 | 0x80);
         }
-      else if (BYTE_ASCII_P (c))
+      else if (c2 == 0)
         {
-         Dynarr_add (dst, c);
-         ch = 0;
+         Dynarr_add (dst, c1);
         }
-      else if (BUFBYTE_LEADING_BYTE_P (c))
-       ch = (c == LEADING_BYTE_KATAKANA_JISX0201 ||
-             c == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
-             c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0;
-      else if (ch)
+      else if (EQ(charset, Vcharset_japanese_jisx0208))
         {
-         if (ch == LEADING_BYTE_KATAKANA_JISX0201)
-           {
-             Dynarr_add (dst, c);
-             ch = 0;
-           }
-         else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
-                  ch == LEADING_BYTE_JAPANESE_JISX0208)
-           ch = c;
-         else
-           {
-             unsigned char j1, j2;
-             ENCODE_SJIS (ch, c, j1, j2);
-             Dynarr_add (dst, j1);
-             Dynarr_add (dst, j2);
-             ch = 0;
-           }
+         ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+         Dynarr_add (dst, s1);
+         Dynarr_add (dst, s2);
         }
-#endif
+      else
+       Dynarr_add (dst, '?');
      }
+}
  
-  str->flags = flags;
-  str->ch    = ch;
-#ifdef UTF2000
-  str->iso2022.current_char_boundary = char_boundary;
-#endif
+void
+char_finish_shift_jis (struct encoding_stream *str, unsigned_char_dynarr *dst,
+                      unsigned int *flags)
+{
  }
  
  DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
@@ -3692,164 +3616,8 @@ Return the corresponding character code in Big5.
  \f
  /************************************************************************/
  /*                           UCS-4 methods                              */
-/*                                                                      */
-/*  UCS-4 character codes are implemented as nonnegative integers.      */
-/*                                                                      */
  /************************************************************************/
  
-
-DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
-Map UCS-4 code CODE to Mule character CHARACTER.
-
-Return T on success, NIL on failure.
-*/
-       (code, character))
-{
-  size_t c;
-
-  CHECK_CHAR (character);
-  CHECK_NATNUM (code);
-  c = XINT (code);
-
-  if (c < countof (fcd->ucs_to_mule_table))
-    {
-      fcd->ucs_to_mule_table[c] = character;
-      return Qt;
-    }
-  else
-    return Qnil;
-}
-
-static Lisp_Object
-ucs_to_char (unsigned long code)
-{
-  if (code < countof (fcd->ucs_to_mule_table))
-    {
-      return fcd->ucs_to_mule_table[code];
-    }
-  else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
-    {
-      unsigned int c;
-
-      code -= 0xe00000;
-      c = code % (94 * 94);
-      return make_char
-       (MAKE_CHAR (CHARSET_BY_ATTRIBUTES
-                   (CHARSET_TYPE_94X94, code / (94 * 94) + '@',
-                    CHARSET_LEFT_TO_RIGHT),
-                   c / 94 + 33, c % 94 + 33));
-    }
-  else
-    return Qnil;
-}
-
-DEFUN ("ucs-char", Fucs_char, 1, 1, 0, /*
-Return Mule character corresponding to UCS code CODE (a positive integer).
-*/
-       (code))
-{
-  CHECK_NATNUM (code);
-  return ucs_to_char (XINT (code));
-}
-
-DEFUN ("set-char-ucs", Fset_char_ucs, 2, 2, 0, /*
-Map Mule character CHARACTER to UCS code CODE (a positive integer).
-*/
-       (character, code))
-{
-  /* #### Isn't this gilding the lily?  Fput_char_table checks its args.
-          Fset_char_ucs is more restrictive on index arg, but should
-          check code arg in a char_table method. */
-  CHECK_CHAR (character);
-  CHECK_NATNUM (code);
-  return Fput_char_table (character, code, mule_to_ucs_table);
-}
-
-DEFUN ("char-ucs", Fchar_ucs, 1, 1, 0, /*
-Return the UCS code (a positive integer) corresponding to CHARACTER.
-*/
-       (character))
-{
-  return Fget_char_table (character, mule_to_ucs_table);
-}
-
-#ifdef UTF2000
-#define decode_ucs4 DECODE_ADD_UCS_CHAR
-#else
-/* Decode a UCS-4 character into a buffer.  If the lookup fails, use
-   <GETA MARK> (U+3013) of JIS X 0208, which means correct character
-   is not found, instead.
-   #### do something more appropriate (use blob?)
-        Danger, Will Robinson!  Data loss.  Should we signal user? */
-static void
-decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst)
-{
-  Lisp_Object chr = ucs_to_char (ch);
-
-  if (! NILP (chr))
-    {
-      Bufbyte work[MAX_EMCHAR_LEN];
-      int len;
-
-      ch = XCHAR (chr);
-      len = (ch < 128) ?
-       simple_set_charptr_emchar (work, ch) :
-       non_ascii_set_charptr_emchar (work, ch);
-      Dynarr_add_many (dst, work, len);
-    }
-  else
-    {
-      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
-      Dynarr_add (dst, 34 + 128);
-      Dynarr_add (dst, 46 + 128);
-    }
-}
-#endif
-
-static unsigned long
-mule_char_to_ucs4 (Lisp_Object charset,
-                  unsigned char h, unsigned char l)
-{
-  Lisp_Object code
-    = Fget_char_table (make_char (MAKE_CHAR (charset, h & 127, l & 127)),
-                      mule_to_ucs_table);
-
-  if (INTP (code))
-    {
-      return XINT (code);
-    }
-  else if ( (XCHARSET_DIMENSION (charset) == 2) &&
-           (XCHARSET_CHARS (charset) == 94) )
-    {
-      unsigned char final = XCHARSET_FINAL (charset);
-
-      if ( ('@' <= final) && (final < 0x7f) )
-       {
-         return 0xe00000 + (final - '@') * 94 * 94
-           + ((h & 127) - 33) * 94 + (l & 127) - 33;
-       }
-      else
-       {
-         return '?';
-       }
-    }
-  else
-    {
-      return '?';
-    }
-}
-
-static void
-encode_ucs4 (Lisp_Object charset,
-            unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
-{
-  unsigned long code = mule_char_to_ucs4 (charset, h, l);
-  Dynarr_add (dst,  code >> 24);
-  Dynarr_add (dst, (code >> 16) & 255);
-  Dynarr_add (dst, (code >>  8) & 255);
-  Dynarr_add (dst,  code        & 255);
-}
-
  static int
  detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, Lstream_data_count n)
  {
@@ -3893,7 +3661,7 @@ decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
           counter = 3;
           break;
         case 1:
-         decode_ucs4 ( ( ch << 8 ) | c, dst);
+         DECODE_ADD_UCS_CHAR ((ch << 8) | c, dst);
           ch = 0;
           counter = 0;
           break;
@@ -3910,140 +3678,20 @@ decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
    str->counter = counter;
  }
  
-static void
-encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
-                   unsigned_char_dynarr *dst, Lstream_data_count n)
+void
+char_encode_ucs4 (struct encoding_stream *str, Emchar ch,
+                 unsigned_char_dynarr *dst, unsigned int *flags)
  {
-#ifndef UTF2000
-  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags = str->flags;
-  unsigned int ch = str->ch;
-  unsigned char char_boundary = str->iso2022.current_char_boundary;
-  Lisp_Object charset = str->iso2022.current_charset;
-
-#ifdef ENABLE_COMPOSITE_CHARS
-  /* flags for handling composite chars.  We do a little switcharoo
-     on the source while we're outputting the composite char. */
-  unsigned int saved_n = 0;
-  const unsigned char *saved_src = NULL;
-  int in_composite = 0;
-
- back_to_square_n:
-#endif
-
-  while (n--)
-    {
-      unsigned char c = *src++;
-
-      if (BYTE_ASCII_P (c))
-       {               /* Processing ASCII character */
-         ch = 0;
-         encode_ucs4 (Vcharset_ascii, c, 0, dst);
-         char_boundary = 1;
-       }
-      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
-       { /* Processing Leading Byte */
-         ch = 0;
-         charset = CHARSET_BY_LEADING_BYTE (c);
-         if (LEADING_BYTE_PREFIX_P(c))
-           ch = c;
-         char_boundary = 0;
-       }
-      else
-       {                       /* Processing Non-ASCII character */
-         char_boundary = 1;
-         if (EQ (charset, Vcharset_control_1))
-           {
-             encode_ucs4 (Vcharset_control_1, c, 0, dst);
-           }
-         else
-           {
-             switch (XCHARSET_REP_BYTES (charset))
-               {
-               case 2:
-                 encode_ucs4 (charset, c, 0, dst);
-                 break;
-               case 3:
-                 if (XCHARSET_PRIVATE_P (charset))
-                   {
-                     encode_ucs4 (charset, c, 0, dst);
-                     ch = 0;
-                   }
-                 else if (ch)
-                   {
-#ifdef ENABLE_COMPOSITE_CHARS
-                     if (EQ (charset, Vcharset_composite))
-                       {
-                         if (in_composite)
-                           {
-                             /* #### Bother! We don't know how to
-                                handle this yet. */
-                             Dynarr_add (dst, '\0');
-                             Dynarr_add (dst, '\0');
-                             Dynarr_add (dst, '\0');
-                             Dynarr_add (dst, '~');
-                           }
-                         else
-                           {
-                             Emchar emch = MAKE_CHAR (Vcharset_composite,
-                                                      ch & 0x7F, c & 0x7F);
-                             Lisp_Object lstr = composite_char_string (emch);
-                             saved_n = n;
-                             saved_src = src;
-                             in_composite = 1;
-                             src = XSTRING_DATA   (lstr);
-                             n   = XSTRING_LENGTH (lstr);
-                           }
-                       }
-                     else
-#endif /* ENABLE_COMPOSITE_CHARS */
-                       {
-                         encode_ucs4(charset, ch, c, dst);
-                       }
-                     ch = 0;
-                   }
-                 else
-                   {
-                     ch = c;
-                     char_boundary = 0;
-                   }
-                 break;
-               case 4:
-                 if (ch)
-                   {
-                     encode_ucs4 (charset, ch, c, dst);
-                     ch = 0;
-                   }
-                 else
-                   {
-                     ch = c;
-                     char_boundary = 0;
-                   }
-                 break;
-               default:
-                 abort ();
-               }
-           }
-       }
-    }
-
-#ifdef ENABLE_COMPOSITE_CHARS
-  if (in_composite)
-    {
-      n = saved_n;
-      src = saved_src;
-      in_composite = 0;
-      goto back_to_square_n; /* Wheeeeeeeee ..... */
-    }
-#endif /* ENABLE_COMPOSITE_CHARS */
-
-  str->flags = flags;
-  str->ch = ch;
-  str->iso2022.current_char_boundary = char_boundary;
-  str->iso2022.current_charset = charset;
+  Dynarr_add (dst, ch >> 24);
+  Dynarr_add (dst, ch >> 16);
+  Dynarr_add (dst, ch >>  8);
+  Dynarr_add (dst, ch      );
+}
  
-  /* Verbum caro factum est! */
-#endif
+void
+char_finish_ucs4 (struct encoding_stream *str, unsigned_char_dynarr *dst,
+                 unsigned int *flags)
+{
  }
  
  \f
@@ -4129,12 +3777,12 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
           else
             {
               DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
-             decode_ucs4 (c, dst);
+             DECODE_ADD_UCS_CHAR (c, dst);
             }
           break;
         case 1:
           ch = ( ch << 6 ) | ( c & 0x3f );
-         decode_ucs4 (ch, dst);
+         DECODE_ADD_UCS_CHAR (ch, dst);
           ch = 0;
           counter = 0;
           break;
@@ -4154,47 +3802,56 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
  }
  
  void
-char_encode_utf8 (struct encoding_stream *str, Emchar code,
+char_encode_utf8 (struct encoding_stream *str, Emchar ch,
                   unsigned_char_dynarr *dst, unsigned int *flags)
  {
-  if ( code <= 0x7f )
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+
+  if (ch == '\n')
+    {
+      if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+       Dynarr_add (dst, '\r');
+      if (eol_type != EOL_CR)
+       Dynarr_add (dst, ch);
+    }
+  else if (ch <= 0x7f)
      {
-      Dynarr_add (dst, code);
+      Dynarr_add (dst, ch);
      }
-  else if ( code <= 0x7ff )
+  else if (ch <= 0x7ff)
      {
-      Dynarr_add (dst, (code >> 6) | 0xc0);
-      Dynarr_add (dst, (code & 0x3f) | 0x80);
+      Dynarr_add (dst, (ch >> 6) | 0xc0);
+      Dynarr_add (dst, (ch & 0x3f) | 0x80);
      }
-  else if ( code <= 0xffff )
+  else if (ch <= 0xffff)
      {
-      Dynarr_add (dst,  (code >> 12) | 0xe0);
-      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
-      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch >> 12) | 0xe0);
+      Dynarr_add (dst, ((ch >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch        & 0x3f) | 0x80);
      }
-  else if ( code <= 0x1fffff )
+  else if (ch <= 0x1fffff)
      {
-      Dynarr_add (dst,  (code >> 18) | 0xf0);
-      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
-      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
-      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch >> 18) | 0xf0);
+      Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((ch >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch        & 0x3f) | 0x80);
      }
-  else if ( code <= 0x3ffffff )
+  else if (ch <= 0x3ffffff)
      {
-      Dynarr_add (dst,  (code >> 24) | 0xf8);
-      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
-      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
-      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
-      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch >> 24) | 0xf8);
+      Dynarr_add (dst, ((ch >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((ch >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch        & 0x3f) | 0x80);
      }
    else
      {
-      Dynarr_add (dst,  (code >> 30) | 0xfc);
-      Dynarr_add (dst, ((code >> 24) & 0x3f) | 0x80);
-      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
-      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
-      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
-      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch >> 30) | 0xfc);
+      Dynarr_add (dst, ((ch >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((ch >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((ch >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (ch        & 0x3f) | 0x80);
      }
  }
  
@@ -5144,9 +4801,23 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
                     charset = new_charset;
                 }
  
-#ifndef UTF2000
+#ifdef UTF2000
+             if (XCHARSET_DIMENSION (charset) == 1)
+               {
+                 DECODE_OUTPUT_PARTIAL_CHAR (ch);
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+               }
+             else if (ch)
+               {
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+                 ch = 0;
+               }
+             else
+               ch = c;
+#else
               lb = XCHARSET_LEADING_BYTE (charset);
-#endif
               switch (XCHARSET_REP_BYTES (charset))
                 {
                 case 1: /* ASCII */
@@ -5156,44 +4827,25 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
  
                 case 2: /* one-byte official */
                   DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
-                 DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst);
-#else
                   Dynarr_add (dst, lb);
                   Dynarr_add (dst, c | 0x80);
-#endif
                   break;
  
                 case 3: /* one-byte private or two-byte official */
-#ifdef UTF2000
-                 if (XCHARSET_DIMENSION (charset) == 1)
-#else
                   if (XCHARSET_PRIVATE_P (charset))
-#endif
                     {
                       DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
-                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0),
-                                         dst);
-#else
                       Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
                       Dynarr_add (dst, lb);
                       Dynarr_add (dst, c | 0x80);
-#endif
                     }
                   else
                     {
                       if (ch)
                         {
-#ifdef UTF2000
-                         DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
-                                                       ch & 0x7F,
-                                                       c & 0x7F), dst);
-#else
                           Dynarr_add (dst, lb);
                           Dynarr_add (dst, ch | 0x80);
                           Dynarr_add (dst, c | 0x80);
-#endif
                           ch = 0;
                         }
                       else
@@ -5204,21 +4856,16 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
                 default:        /* two-byte private */
                   if (ch)
                     {
-#ifdef UTF2000
-                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
-                                                   ch & 0x7F,
-                                                   c & 0x7F), dst);
-#else
                       Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
                       Dynarr_add (dst, lb);
                       Dynarr_add (dst, ch | 0x80);
                       Dynarr_add (dst, c | 0x80);
-#endif
                       ch = 0;
                     }
                   else
                     ch = c;
                 }
+#endif
             }
  
           if (!ch)
@@ -5334,8 +4981,8 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
    Lisp_Coding_System* codesys = str->codesys;
    eol_type_t eol_type         = CODING_SYSTEM_EOL_TYPE (str->codesys);
    int i;
-  Lisp_Object charset;
-  int half;
+  Lisp_Object charset = str->iso2022.current_charset;
+  int half = str->iso2022.current_half;
    unsigned int byte1, byte2;
  
    if (ch <= 0x7F)
@@ -5493,6 +5140,8 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
           abort ();
         }
      }
+  str->iso2022.current_charset = charset;
+  str->iso2022.current_half = half;
  }
  
  void
@@ -5561,80 +5210,54 @@ encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
      {
        c = *src++;        
  #ifdef UTF2000
-      switch (char_boundary)
+      if (char_boundary == 0)
+       if ( c >= 0xfc )
+         {
+           ch = c & 0x01;
+           char_boundary = 5;
+         }
+       else if ( c >= 0xf8 )
+         {
+           ch = c & 0x03;
+           char_boundary = 4;
+         }
+       else if ( c >= 0xf0 )
+         {
+           ch = c & 0x07;
+           char_boundary = 3;
+         }
+       else if ( c >= 0xe0 )
+         {
+           ch = c & 0x0f;
+           char_boundary = 2;
+         }
+       else if ( c >= 0xc0 )
+         {
+           ch = c & 0x1f;
+           char_boundary = 1;
+         }
+       else
+         {
+           ch = 0;
+           if (c == '\n')
+             {
+               if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                 Dynarr_add (dst, '\r');
+               if (eol_type != EOL_CR)
+                 Dynarr_add (dst, c);
+             }
+           else
+             Dynarr_add (dst, c);
+           char_boundary = 0;
+         }
+      else if (char_boundary == 1)
         {
-       case 0:
-         if ( c >= 0xfc )
-           {
-             ch = c & 0x01;
-             char_boundary = 5;
-           }
-         else if ( c >= 0xf8 )
-           {
-             ch = c & 0x03;
-             char_boundary = 4;
-           }
-         else if ( c >= 0xf0 )
-           {
-             ch = c & 0x07;
-             char_boundary = 3;
-           }
-         else if ( c >= 0xe0 )
-           {
-             ch = c & 0x0f;
-             char_boundary = 2;
-           }
-         else if ( c >= 0xc0 )
-           {
-             ch = c & 0x1f;
-             char_boundary = 1;
-           }
-         else
-           {
-             ch = 0;
-
-             if (c == '\n')
-               {
-                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
-                   Dynarr_add (dst, '\r');
-                 if (eol_type != EOL_CR)
-                   Dynarr_add (dst, c);
-               }
-             else
-               Dynarr_add (dst, c);
-             char_boundary = 0;
-           }
-         break;
-       case 1:
           ch = ( ch << 6 ) | ( c & 0x3f );
-         switch ( str->codesys->fixed.size )
-           {
-           case 1:
-             Dynarr_add (dst, ch & 0xff);
-             break;
-           case 2:
-             Dynarr_add (dst, (ch >> 8) & 0xff);
-             Dynarr_add (dst,  ch       & 0xff);
-             break;
-           case 3:
-             Dynarr_add (dst, (ch >> 16) & 0xff);
-             Dynarr_add (dst, (ch >>  8) & 0xff);
-             Dynarr_add (dst,  ch        & 0xff);
-             break;
-           case 4:
-             Dynarr_add (dst, (ch >> 24) & 0xff);
-             Dynarr_add (dst, (ch >> 16) & 0xff);
-             Dynarr_add (dst, (ch >>  8) & 0xff);
-             Dynarr_add (dst,  ch        & 0xff);
-             break;
-           default:
-             fprintf(stderr, "It seems %d bytes stream.\n",
-                     str->codesys->fixed.size);
-             abort ();
-           }
+         Dynarr_add (dst, ch & 0xff);
           char_boundary = 0;
-         break;
-       default:
+       }
+      else
+       {
           ch = ( ch << 6 ) | ( c & 0x3f );
           char_boundary--;
         }
@@ -5731,10 +5354,6 @@ syms_of_file_coding (void)
    DEFSUBR (Fencode_shift_jis_char);
    DEFSUBR (Fdecode_big5_char);
    DEFSUBR (Fencode_big5_char);
-  DEFSUBR (Fset_ucs_char);
-  DEFSUBR (Fucs_char);
-  DEFSUBR (Fset_char_ucs);
-  DEFSUBR (Fchar_ucs);
  #endif /* MULE */
    defsymbol (&Qcoding_systemp, "coding-system-p");
    defsymbol (&Qno_conversion, "no-conversion");
@@ -5971,15 +5590,4 @@ complex_vars_of_file_coding (void)
    fcd->coding_category_system[CODING_CATEGORY_UTF8]
      = Fget_coding_system (Qutf8);
  #endif
-
-#if defined(MULE) && !defined(UTF2000)
-  {
-    size_t i;
-
-    for (i = 0; i < countof (fcd->ucs_to_mule_table); i++)
-      fcd->ucs_to_mule_table[i] = Qnil;
-  }
-  staticpro (&mule_to_ucs_table);
-  mule_to_ucs_table = Fmake_char_table(Qgeneric);
-#endif /* defined(MULE) && !defined(UTF2000) */
  }
author	tomo <tomo>
	Mon, 16 Dec 2002 05:06:18 +0000 (05:06 +0000)
committer	tomo <tomo>
	Mon, 16 Dec 2002 05:06:18 +0000 (05:06 +0000)
lisp/ChangeLog		patch \| blob \| history
lisp/custom-load.el		patch \| blob \| history
lisp/mule/mule-charset.el		patch \| blob \| history
src/ChangeLog		patch \| blob \| history
src/char-lb.h		patch \| blob \| history
src/char-ucs.h		patch \| blob \| history
src/depend		patch \| blob \| history
src/file-coding.c		patch \| blob \| history
src/file-coding.h		patch \| blob \| history
src/indent.c		patch \| blob \| history
src/insdel.c		patch \| blob \| history
src/mule-charset.c		patch \| blob \| history
src/objects-tty.c		patch \| blob \| history
src/text-coding.c		patch \| blob \| history