(char_encode_shift_jis): Refer

[chise/xemacs-chise.git-] / src / file-coding.c
diff --git a/src/file-coding.c b/src/file-coding.c

index 48363a4..3068c89 100644 (file)
--- a/src/file-coding.c
+++ b/src/file-coding.c
@@ -25,12 +25,14 @@ Boston, MA 02111-1307, USA.  */
  
  #include <config.h>
  #include "lisp.h"
  
  #include <config.h>
  #include "lisp.h"
+
  #include "buffer.h"
  #include "elhash.h"
  #include "insdel.h"
  #include "lstream.h"
  #ifdef MULE
  #include "mule-ccl.h"
  #include "buffer.h"
  #include "elhash.h"
  #include "insdel.h"
  #include "lstream.h"
  #ifdef MULE
  #include "mule-ccl.h"
+#include "chartab.h"
  #endif
  #include "file-coding.h"
  
  #endif
  #include "file-coding.h"
  
@@ -54,7 +56,7 @@ int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
  
  Lisp_Object Qcoding_system_p;
  
  
  Lisp_Object Qcoding_system_p;
  
-Lisp_Object Qno_conversion, Qccl, Qiso2022;
+Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022;
  /* Qinternal in general.c */
  
  Lisp_Object Qmnemonic, Qeol_type;
  /* Qinternal in general.c */
  
  Lisp_Object Qmnemonic, Qeol_type;
@@ -64,6 +66,7 @@ Lisp_Object Qpost_read_conversion;
  Lisp_Object Qpre_write_conversion;
  
  #ifdef MULE
  Lisp_Object Qpre_write_conversion;
  
  #ifdef MULE
+Lisp_Object Qucs4, Qutf8;
  Lisp_Object Qbig5, Qshift_jis;
  Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
  Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
  Lisp_Object Qbig5, Qshift_jis;
  Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
  Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
@@ -75,7 +78,7 @@ Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
  #endif
  Lisp_Object Qencode, Qdecode;
  
  #endif
  Lisp_Object Qencode, Qdecode;
  
-Lisp_Object Vcoding_system_hashtable;
+Lisp_Object Vcoding_system_hash_table;
  
  int enable_multibyte_characters;
  
  
  int enable_multibyte_characters;
  
@@ -103,8 +106,10 @@ struct iso2022_decoder
    /* Index for next byte to store in ISO escape sequence. */
    int esc_bytes_index;
  
    /* Index for next byte to store in ISO escape sequence. */
    int esc_bytes_index;
  
+#ifdef ENABLE_COMPOSITE_CHARS
    /* Stuff seen so far when composing a string. */
    unsigned_char_dynarr *composite_chars;
    /* Stuff seen so far when composing a string. */
    unsigned_char_dynarr *composite_chars;
+#endif
  
    /* If we saw an invalid designation sequence for a particular
       register, we flag it here and switch to ASCII.  The next time we
  
    /* If we saw an invalid designation sequence for a particular
       register, we flag it here and switch to ASCII.  The next time we
@@ -166,6 +171,24 @@ static void decode_coding_big5 (Lstream *decoding,
  static void encode_coding_big5 (Lstream *encoding,
                                 CONST unsigned char *src,
                                 unsigned_char_dynarr *dst, unsigned int n);
  static void encode_coding_big5 (Lstream *encoding,
                                 CONST unsigned char *src,
                                 unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_ucs4 (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_ucs4 (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_ucs4 (Lstream *encoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_utf8 (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_utf8 (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_utf8 (Lstream *encoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
  static int postprocess_iso2022_mask (int mask);
  static void reset_iso2022 (Lisp_Object coding_system,
                            struct iso2022_decoder *iso);
  static int postprocess_iso2022_mask (int mask);
  static void reset_iso2022 (Lisp_Object coding_system,
                            struct iso2022_decoder *iso);
@@ -222,22 +245,58 @@ static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object));
  static void print_coding_system (Lisp_Object, Lisp_Object, int);
  static void finalize_coding_system (void *header, int for_disksave);
  
  static void print_coding_system (Lisp_Object, Lisp_Object, int);
  static void finalize_coding_system (void *header, int for_disksave);
  
+#ifdef MULE
+static const struct lrecord_description ccs_description_1[] = {
+  { XD_LISP_OBJECT, offsetof(charset_conversion_spec, from_charset), 2 },
+  { XD_END }
+};
+
+static const struct struct_description ccs_description = {
+  sizeof(charset_conversion_spec),
+  ccs_description_1
+};
+  
+static const struct lrecord_description ccsd_description_1[] = {
+  XD_DYNARR_DESC(charset_conversion_spec_dynarr, &ccs_description),
+  { XD_END }
+};
+
+static const struct struct_description ccsd_description = {
+  sizeof(charset_conversion_spec_dynarr),
+  ccsd_description_1
+};
+#endif
+
+static const struct lrecord_description coding_system_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, name), 2 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, mnemonic), 3 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, eol_lf), 3 },
+#ifdef MULE
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, iso2022.initial_charset), 4 },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.input_conv),  1, &ccsd_description },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, ccl.decode), 2 },
+#endif
+  { XD_END }
+};
+
  DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
                                mark_coding_system, print_coding_system,
                                finalize_coding_system,
  DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
                                mark_coding_system, print_coding_system,
                                finalize_coding_system,
-                              0, 0, struct Lisp_Coding_System);
+                              0, 0, coding_system_description,
+                              struct Lisp_Coding_System);
  
  static Lisp_Object
  mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
  {
  
  static Lisp_Object
  mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
  {
-  struct Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
+  Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
  
  
-  (markobj) (CODING_SYSTEM_NAME (codesys));
-  (markobj) (CODING_SYSTEM_DOC_STRING (codesys));
-  (markobj) (CODING_SYSTEM_MNEMONIC (codesys));
-  (markobj) (CODING_SYSTEM_EOL_LF (codesys));
-  (markobj) (CODING_SYSTEM_EOL_CRLF (codesys));
-  (markobj) (CODING_SYSTEM_EOL_CR (codesys));
+  markobj (CODING_SYSTEM_NAME (codesys));
+  markobj (CODING_SYSTEM_DOC_STRING (codesys));
+  markobj (CODING_SYSTEM_MNEMONIC (codesys));
+  markobj (CODING_SYSTEM_EOL_LF (codesys));
+  markobj (CODING_SYSTEM_EOL_CRLF (codesys));
+  markobj (CODING_SYSTEM_EOL_CR (codesys));
  
    switch (CODING_SYSTEM_TYPE (codesys))
      {
  
    switch (CODING_SYSTEM_TYPE (codesys))
      {
@@ -245,15 +304,15 @@ mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
        int i;
      case CODESYS_ISO2022:
        for (i = 0; i < 4; i++)
        int i;
      case CODESYS_ISO2022:
        for (i = 0; i < 4; i++)
-       (markobj) (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
+       markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
        if (codesys->iso2022.input_conv)
         {
           for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
             {
               struct charset_conversion_spec *ccs =
                 Dynarr_atp (codesys->iso2022.input_conv, i);
        if (codesys->iso2022.input_conv)
         {
           for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
             {
               struct charset_conversion_spec *ccs =
                 Dynarr_atp (codesys->iso2022.input_conv, i);
-             (markobj) (ccs->from_charset);
-             (markobj) (ccs->to_charset);
+             markobj (ccs->from_charset);
+             markobj (ccs->to_charset);
             }
         }
        if (codesys->iso2022.output_conv)
             }
         }
        if (codesys->iso2022.output_conv)
@@ -262,22 +321,22 @@ mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
             {
               struct charset_conversion_spec *ccs =
                 Dynarr_atp (codesys->iso2022.output_conv, i);
             {
               struct charset_conversion_spec *ccs =
                 Dynarr_atp (codesys->iso2022.output_conv, i);
-             (markobj) (ccs->from_charset);
-             (markobj) (ccs->to_charset);
+             markobj (ccs->from_charset);
+             markobj (ccs->to_charset);
             }
         }
        break;
  
      case CODESYS_CCL:
             }
         }
        break;
  
      case CODESYS_CCL:
-      (markobj) (CODING_SYSTEM_CCL_DECODE (codesys));
-      (markobj) (CODING_SYSTEM_CCL_ENCODE (codesys));
+      markobj (CODING_SYSTEM_CCL_DECODE (codesys));
+      markobj (CODING_SYSTEM_CCL_ENCODE (codesys));
        break;
  #endif /* MULE */
      default:
        break;
      }
  
        break;
  #endif /* MULE */
      default:
        break;
      }
  
-  (markobj) (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
+  markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
    return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
  }
  
    return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
  }
  
@@ -285,7 +344,7 @@ static void
  print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
                      int escapeflag)
  {
  print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
                      int escapeflag)
  {
-  struct Lisp_Coding_System *c = XCODING_SYSTEM (obj);
+  Lisp_Coding_System *c = XCODING_SYSTEM (obj);
    if (print_readably)
      error ("printing unreadable object #<coding_system 0x%x>",
            c->header.uid);
    if (print_readably)
      error ("printing unreadable object #<coding_system 0x%x>",
            c->header.uid);
@@ -298,7 +357,7 @@ print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
  static void
  finalize_coding_system (void *header, int for_disksave)
  {
  static void
  finalize_coding_system (void *header, int for_disksave)
  {
-  struct Lisp_Coding_System *c = (struct Lisp_Coding_System *) header;
+  Lisp_Coding_System *c = (Lisp_Coding_System *) header;
    /* Since coding systems never go away, this function is not
       necessary.  But it would be necessary if we changed things
       so that coding systems could go away. */
    /* Since coding systems never go away, this function is not
       necessary.  But it would be necessary if we changed things
       so that coding systems could go away. */
@@ -344,16 +403,16 @@ eol_type_to_symbol (enum eol_type type)
  {
    switch (type)
      {
  {
    switch (type)
      {
+    default: abort ();
      case EOL_LF:         return Qlf;
      case EOL_CRLF:       return Qcrlf;
      case EOL_CR:         return Qcr;
      case EOL_AUTODETECT: return Qnil;
      case EOL_LF:         return Qlf;
      case EOL_CRLF:       return Qcrlf;
      case EOL_CR:         return Qcr;
      case EOL_AUTODETECT: return Qnil;
-    default:             abort (); return Qnil; /* not reached */
      }
  }
  
  static void
      }
  }
  
  static void
-setup_eol_coding_systems (struct Lisp_Coding_System *codesys)
+setup_eol_coding_systems (Lisp_Coding_System *codesys)
  {
    Lisp_Object codesys_obj;
    int len = string_length (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name);
  {
    Lisp_Object codesys_obj;
    int len = string_length (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name);
@@ -439,7 +498,7 @@ associated coding system object is returned.
    else
      CHECK_SYMBOL (coding_system_or_name);
  
    else
      CHECK_SYMBOL (coding_system_or_name);
  
-  return Fgethash (coding_system_or_name, Vcoding_system_hashtable, Qnil);
+  return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
  }
  
  DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
  }
  
  DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
@@ -465,19 +524,15 @@ struct coding_system_list_closure
  };
  
  static int
  };
  
  static int
-add_coding_system_to_list_mapper (CONST void *hash_key, void *hash_contents,
+add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
                                   void *coding_system_list_closure)
  {
    /* This function can GC */
                                   void *coding_system_list_closure)
  {
    /* This function can GC */
-  Lisp_Object key, contents;
-  Lisp_Object *coding_system_list;
    struct coding_system_list_closure *cscl =
      (struct coding_system_list_closure *) coding_system_list_closure;
    struct coding_system_list_closure *cscl =
      (struct coding_system_list_closure *) coding_system_list_closure;
-  CVOID_TO_LISP (key, hash_key);
-  VOID_TO_LISP (contents, hash_contents);
-  coding_system_list = cscl->coding_system_list;
+  Lisp_Object *coding_system_list = cscl->coding_system_list;
  
  
-  *coding_system_list = Fcons (XCODING_SYSTEM (contents)->name,
+  *coding_system_list = Fcons (XCODING_SYSTEM (value)->name,
                                *coding_system_list);
    return 0;
  }
                                *coding_system_list);
    return 0;
  }
@@ -493,7 +548,7 @@ Return a list of the names of all defined coding systems.
  
    GCPRO1 (coding_system_list);
    coding_system_list_closure.coding_system_list = &coding_system_list;
  
    GCPRO1 (coding_system_list);
    coding_system_list_closure.coding_system_list = &coding_system_list;
-  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hashtable,
+  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table,
                  &coding_system_list_closure);
    UNGCPRO;
  
                  &coding_system_list_closure);
    UNGCPRO;
  
@@ -509,11 +564,11 @@ Return the name of the given coding system.
    return XCODING_SYSTEM_NAME (coding_system);
  }
  
    return XCODING_SYSTEM_NAME (coding_system);
  }
  
-static struct Lisp_Coding_System *
+static Lisp_Coding_System *
  allocate_coding_system (enum coding_system_type type, Lisp_Object name)
  {
  allocate_coding_system (enum coding_system_type type, Lisp_Object name)
  {
-  struct Lisp_Coding_System *codesys =
-    alloc_lcrecord_type (struct Lisp_Coding_System, lrecord_coding_system);
+  Lisp_Coding_System *codesys =
+    alloc_lcrecord_type (Lisp_Coding_System, &lrecord_coding_system);
  
    zero_lcrecord (codesys);
    CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
  
    zero_lcrecord (codesys);
    CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
@@ -612,6 +667,10 @@ nil or 'undecided
       characters will only be present if you explicitly insert them.)
  'shift-jis
       Shift-JIS (a Japanese encoding commonly used in PC operating systems).
       characters will only be present if you explicitly insert them.)
  'shift-jis
       Shift-JIS (a Japanese encoding commonly used in PC operating systems).
+'ucs-4
+     ISO 10646 UCS-4 encoding.
+'utf-8
+     ISO 10646 UTF-8 encoding.
  'iso2022
       Any ISO2022-compliant encoding.  Among other things, this includes
       JIS (the Japanese encoding commonly used for e-mail), EUC (the
  'iso2022
       Any ISO2022-compliant encoding.  Among other things, this includes
       JIS (the Japanese encoding commonly used for e-mail), EUC (the
@@ -766,7 +825,7 @@ if TYPE is 'ccl:
  */
         (name, type, doc_string, props))
  {
  */
         (name, type, doc_string, props))
  {
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
    Lisp_Object rest, key, value;
    enum coding_system_type ty;
    int need_to_setup_eol_systems = 1;
    Lisp_Object rest, key, value;
    enum coding_system_type ty;
    int need_to_setup_eol_systems = 1;
@@ -778,6 +837,8 @@ if TYPE is 'ccl:
    else if (EQ (type, Qshift_jis))     { ty = CODESYS_SHIFT_JIS; }
    else if (EQ (type, Qiso2022))       { ty = CODESYS_ISO2022; }
    else if (EQ (type, Qbig5))          { ty = CODESYS_BIG5; }
    else if (EQ (type, Qshift_jis))     { ty = CODESYS_SHIFT_JIS; }
    else if (EQ (type, Qiso2022))       { ty = CODESYS_ISO2022; }
    else if (EQ (type, Qbig5))          { ty = CODESYS_BIG5; }
+  else if (EQ (type, Qucs4))          { ty = CODESYS_UCS4; }
+  else if (EQ (type, Qutf8))          { ty = CODESYS_UTF8; }
    else if (EQ (type, Qccl))           { ty = CODESYS_CCL; }
  #endif
    else if (EQ (type, Qno_conversion)) { ty = CODESYS_NO_CONVERSION; }
    else if (EQ (type, Qccl))           { ty = CODESYS_CCL; }
  #endif
    else if (EQ (type, Qno_conversion)) { ty = CODESYS_NO_CONVERSION; }
@@ -890,7 +951,7 @@ if TYPE is 'ccl:
    {
      Lisp_Object codesys_obj;
      XSETCODING_SYSTEM (codesys_obj, codesys);
    {
      Lisp_Object codesys_obj;
      XSETCODING_SYSTEM (codesys_obj, codesys);
-    Fputhash (name, codesys_obj, Vcoding_system_hashtable);
+    Fputhash (name, codesys_obj, Vcoding_system_hash_table);
      return codesys_obj;
    }
  }
      return codesys_obj;
    }
  }
@@ -911,12 +972,12 @@ be created.
                          allocate_coding_system
                          (XCODING_SYSTEM_TYPE (old_coding_system),
                           new_name));
                          allocate_coding_system
                          (XCODING_SYSTEM_TYPE (old_coding_system),
                           new_name));
-      Fputhash (new_name, new_coding_system, Vcoding_system_hashtable);
+      Fputhash (new_name, new_coding_system, Vcoding_system_hash_table);
      }
  
    {
      }
  
    {
-    struct Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
-    struct Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
+    Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
+    Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
      memcpy (((char *) to  ) + sizeof (to->header),
             ((char *) from) + sizeof (from->header),
             sizeof (*from) - sizeof (from->header));
      memcpy (((char *) to  ) + sizeof (to->header),
             ((char *) from) + sizeof (from->header),
             sizeof (*from) - sizeof (from->header));
@@ -925,10 +986,44 @@ be created.
    return new_coding_system;
  }
  
    return new_coding_system;
  }
  
+DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
+Define symbol ALIAS as an alias for coding system CODING-SYSTEM.
+*/
+       (alias, coding_system))
+{
+  CHECK_SYMBOL (alias);
+  if (!NILP (Ffind_coding_system (alias)))
+    signal_simple_error ("Symbol already names a coding system", alias);
+  coding_system = Fget_coding_system (coding_system);
+  Fputhash (alias, coding_system, Vcoding_system_hash_table);
+
+  /* Set up aliases for subsidiaries. */
+  if (XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
+    {
+      Lisp_Object str;
+      XSETSTRING (str, symbol_name (XSYMBOL (alias)));
+#define FROB(type, name)                                                       \
+      do {                                                                     \
+       Lisp_Object subsidiary = XCODING_SYSTEM_EOL_##type (coding_system);     \
+       if (!NILP (subsidiary))                                                 \
+         Fdefine_coding_system_alias                                           \
+           (Fintern (concat2 (str, build_string (name)), Qnil), subsidiary);   \
+      } while (0)
+      FROB (LF,   "-unix");
+      FROB (CRLF, "-dos");
+      FROB (CR,   "-mac");
+#undef FROB
+    }
+  /* FSF return value is a vector of [ALIAS-unix ALIAS-doc ALIAS-mac],
+     but it doesn't look intentional, so I'd rather return something
+     meaningful or nothing at all. */
+  return Qnil;
+}
+
  static Lisp_Object
  subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
  {
  static Lisp_Object
  subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
  {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
    Lisp_Object new_coding_system;
  
    if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
    Lisp_Object new_coding_system;
  
    if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
@@ -978,22 +1073,21 @@ Return the type of CODING-SYSTEM.
  {
    switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
      {
  {
    switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
      {
+    default: abort ();
      case CODESYS_AUTODETECT:   return Qundecided;
  #ifdef MULE
      case CODESYS_SHIFT_JIS:    return Qshift_jis;
      case CODESYS_ISO2022:      return Qiso2022;
      case CODESYS_BIG5:         return Qbig5;
      case CODESYS_AUTODETECT:   return Qundecided;
  #ifdef MULE
      case CODESYS_SHIFT_JIS:    return Qshift_jis;
      case CODESYS_ISO2022:      return Qiso2022;
      case CODESYS_BIG5:         return Qbig5;
+    case CODESYS_UCS4:         return Qucs4;
+    case CODESYS_UTF8:         return Qutf8;
      case CODESYS_CCL:          return Qccl;
  #endif
      case CODESYS_NO_CONVERSION:        return Qno_conversion;
  #ifdef DEBUG_XEMACS
      case CODESYS_INTERNAL:     return Qinternal;
  #endif
      case CODESYS_CCL:          return Qccl;
  #endif
      case CODESYS_NO_CONVERSION:        return Qno_conversion;
  #ifdef DEBUG_XEMACS
      case CODESYS_INTERNAL:     return Qinternal;
  #endif
-    default:
-      abort ();
      }
      }
-
-  return Qnil; /* not reached */
  }
  
  #ifdef MULE
  }
  
  #ifdef MULE
@@ -1289,6 +1383,20 @@ struct detection_state
    struct
      {
        int mask;
    struct
      {
        int mask;
+      int in_byte;
+  }
+  ucs4;
+
+  struct
+    {
+      int mask;
+      int in_byte;
+    }
+  utf8;
+
+  struct
+    {
+      int mask;
        int initted;
        struct iso2022_decoder iso;
        unsigned int flags;
        int initted;
        struct iso2022_decoder iso;
        unsigned int flags;
@@ -1405,6 +1513,8 @@ detect_coding_type (struct detection_state *st, CONST unsigned char *src,
  #ifdef MULE
               st->shift_jis.mask = ~0;
               st->big5.mask = ~0;
  #ifdef MULE
               st->shift_jis.mask = ~0;
               st->big5.mask = ~0;
+             st->ucs4.mask = ~0;
+             st->utf8.mask = ~0;
               st->iso2022.mask = ~0;
  #endif
               break;
               st->iso2022.mask = ~0;
  #endif
               break;
@@ -1421,8 +1531,14 @@ detect_coding_type (struct detection_state *st, CONST unsigned char *src,
      st->shift_jis.mask = detect_coding_sjis (st, src, n);
    if (!mask_has_at_most_one_bit_p (st->big5.mask))
      st->big5.mask = detect_coding_big5 (st, src, n);
      st->shift_jis.mask = detect_coding_sjis (st, src, n);
    if (!mask_has_at_most_one_bit_p (st->big5.mask))
      st->big5.mask = detect_coding_big5 (st, src, n);
-
-  st->mask = st->iso2022.mask | st->shift_jis.mask | st->big5.mask;
+  if (!mask_has_at_most_one_bit_p (st->utf8.mask))
+    st->utf8.mask = detect_coding_utf8 (st, src, n);
+  if (!mask_has_at_most_one_bit_p (st->ucs4.mask))
+    st->ucs4.mask = detect_coding_ucs4 (st, src, n);
+
+  st->mask
+    = st->iso2022.mask | st->shift_jis.mask | st->big5.mask
+    | st->utf8.mask | st->ucs4.mask;
  #endif
    {
      int retval = mask_has_at_most_one_bit_p (st->mask);
  #endif
    {
      int retval = mask_has_at_most_one_bit_p (st->mask);
@@ -1452,7 +1568,7 @@ coding_system_from_mask (int mask)
             }
         }
        if (NILP (retval))
             }
         }
        if (NILP (retval))
-       retval = Fget_coding_system (Qno_conversion);
+       retval = Fget_coding_system (Qraw_text);
        return retval;
      }
    else
        return retval;
      }
    else
@@ -1474,7 +1590,7 @@ coding_system_from_mask (int mask)
        if (cat >= 0)
         return coding_category_system[cat];
        else
        if (cat >= 0)
         return coding_category_system[cat];
        else
-       return Fget_coding_system (Qno_conversion);
+       return Fget_coding_system (Qraw_text);
      }
  }
  
      }
  }
  
@@ -1504,26 +1620,65 @@ determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
    if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
        *eol_type_in_out == EOL_AUTODETECT)
      {
    if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
        *eol_type_in_out == EOL_AUTODETECT)
      {
+      unsigned char random_buffer[4096];
+      int nread;
+      Lisp_Object coding_system = Qnil;
  
  
-      while (1)
+      nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
+      if (nread)
         {
         {
-         unsigned char random_buffer[4096];
-         int nread;
+         unsigned char *cp = random_buffer;
  
  
-         nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
-         if (!nread)
-           break;
-         if (detect_coding_type (&decst, random_buffer, nread,
-                                 XCODING_SYSTEM_TYPE (*codesys_in_out) !=
-                                 CODESYS_AUTODETECT))
-           break;
-       }
+         while (cp < random_buffer + nread)
+           {
+             if ((*cp++ == 'c') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'o') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'd') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'i') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'n') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'g') && (cp < random_buffer + nread) &&
+                 (*cp++ == ':') && (cp < random_buffer + nread))
+               {
+                 unsigned char coding_system_name[4096 - 6];
+                 unsigned char *np = coding_system_name;
  
  
+                 while ( (cp < random_buffer + nread)
+                         && ((*cp == ' ') || (*cp == '\t')) )
+                   {
+                     cp++;
+                   }
+                 while ( (cp < random_buffer + nread) &&
+                         (*cp != ' ') && (*cp != '\t') && (*cp != ';') )
+                   {
+                     *np++ = *cp++;
+                   }
+                 *np = 0;
+                 coding_system
+                   = Ffind_coding_system (intern (coding_system_name));
+                 break;
+               }
+           }
+         if (EQ(coding_system, Qnil))
+           do{
+             if (detect_coding_type (&decst, random_buffer, nread,
+                                     XCODING_SYSTEM_TYPE (*codesys_in_out)
+                                     != CODESYS_AUTODETECT))
+               break;
+             nread = Lstream_read (stream,
+                                   random_buffer, sizeof (random_buffer));
+             if (!nread)
+               break;
+           } while(1);
+       }
        *eol_type_in_out = decst.eol_type;
        if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
        *eol_type_in_out = decst.eol_type;
        if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
-       *codesys_in_out = coding_system_from_mask (decst.mask);
+       {
+         if (EQ(coding_system, Qnil))
+           *codesys_in_out = coding_system_from_mask (decst.mask);
+         else
+           *codesys_in_out = coding_system;
+       }
      }
      }
-
    /* If we absolutely can't determine the EOL type, just assume LF. */
    if (*eol_type_in_out == EOL_AUTODETECT)
      *eol_type_in_out = EOL_LF;
    /* If we absolutely can't determine the EOL type, just assume LF. */
    if (*eol_type_in_out == EOL_AUTODETECT)
      *eol_type_in_out = EOL_LF;
@@ -1646,6 +1801,62 @@ do {                                                             \
  /* C should be a binary character in the range 0 - 255; convert
     to internal format and add to Dynarr DST. */
  
  /* C should be a binary character in the range 0 - 255; convert
     to internal format and add to Dynarr DST. */
  
+#ifdef UTF2000
+#define DECODE_ADD_BINARY_CHAR(c, dst) \
+do {                                           \
+  if (BYTE_ASCII_P (c))                                \
+    Dynarr_add (dst, c);                       \
+  else                                         \
+    {                                          \
+      Dynarr_add (dst, (c >> 6) | 0xc0);       \
+      Dynarr_add (dst, (c & 0x3f) | 0x80);     \
+    }                                          \
+} while (0)
+
+INLINE void
+DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst)
+{
+  if ( c <= 0x7f )
+    {
+      Dynarr_add (dst, c);
+    }
+  else if ( c <= 0x7ff )
+    {
+      Dynarr_add (dst, (c >> 6) | 0xc0);
+      Dynarr_add (dst, (c & 0x3f) | 0x80);
+    }
+  else if ( c <= 0xffff )
+    {
+      Dynarr_add (dst,  (c >> 12) | 0xe0);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (c >> 18) | 0xf0);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (c >> 24) | 0xf8);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (c >> 30) | 0xfc);
+      Dynarr_add (dst, ((c >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+}
+#else
  #define DECODE_ADD_BINARY_CHAR(c, dst)         \
  do {                                           \
    if (BYTE_ASCII_P (c))                                \
  #define DECODE_ADD_BINARY_CHAR(c, dst)         \
  do {                                           \
    if (BYTE_ASCII_P (c))                                \
@@ -1661,6 +1872,7 @@ do {                                              \
        Dynarr_add (dst, c);                     \
      }                                          \
  } while (0)
        Dynarr_add (dst, c);                     \
      }                                          \
  } while (0)
+#endif
  
  #define DECODE_OUTPUT_PARTIAL_CHAR(ch) \
  do {                                   \
  
  #define DECODE_OUTPUT_PARTIAL_CHAR(ch) \
  do {                                   \
@@ -1673,10 +1885,12 @@ do {                                    \
  
  #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)        \
  do {                                   \
  
  #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)        \
  do {                                   \
-  DECODE_OUTPUT_PARTIAL_CHAR (ch);     \
-  if ((flags & CODING_STATE_END) &&    \
-      (flags & CODING_STATE_CR))       \
-    Dynarr_add (dst, '\r');            \
+  if (flags & CODING_STATE_END)                \
+    {                                  \
+      DECODE_OUTPUT_PARTIAL_CHAR (ch); \
+      if (flags & CODING_STATE_CR)     \
+       Dynarr_add (dst, '\r');         \
+    }                                  \
  } while (0)
  
  #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
  } while (0)
  
  #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
@@ -1684,7 +1898,7 @@ do {                                      \
  struct decoding_stream
  {
    /* Coding system that governs the conversion. */
  struct decoding_stream
  {
    /* Coding system that governs the conversion. */
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
  
    /* Stream that we read the encoded data from or
       write the decoded data to. */
  
    /* Stream that we read the encoded data from or
       write the decoded data to. */
@@ -1718,6 +1932,9 @@ struct decoding_stream
    /* Additional information (the state of the running CCL program)
       used by the CCL decoder. */
    struct ccl_program ccl;
    /* Additional information (the state of the running CCL program)
       used by the CCL decoder. */
    struct ccl_program ccl;
+
+  /* counter for UTF-8 or UCS-4 */
+  unsigned char counter;
  #endif
    struct detection_state decst;
  };
  #endif
    struct detection_state decst;
  };
@@ -1746,7 +1963,7 @@ decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
       and automatically marked. */
  
    XSETLSTREAM (str_obj, str);
       and automatically marked. */
  
    XSETLSTREAM (str_obj, str);
-  (markobj) (str_obj);
+  markobj (str_obj);
    if (str->imp->marker)
      return (str->imp->marker) (str_obj, markobj);
    else
    if (str->imp->marker)
      return (str->imp->marker) (str_obj, markobj);
    else
@@ -1852,6 +2069,7 @@ reset_decoding_stream (struct decoding_stream *str)
      {
        setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
      }
      {
        setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
      }
+  str->counter = 0;
  #endif /* MULE */
    str->flags = str->ch = 0;
  }
  #endif /* MULE */
    str->flags = str->ch = 0;
  }
@@ -1890,9 +2108,11 @@ decoding_closer (Lstream *stream)
      }
    Dynarr_free (str->runoff);
  #ifdef MULE
      }
    Dynarr_free (str->runoff);
  #ifdef MULE
+#ifdef ENABLE_COMPOSITE_CHARS
    if (str->iso2022.composite_chars)
      Dynarr_free (str->iso2022.composite_chars);
  #endif
    if (str->iso2022.composite_chars)
      Dynarr_free (str->iso2022.composite_chars);
  #endif
+#endif
    return Lstream_close (str->other_end);
  }
  
    return Lstream_close (str->other_end);
  }
  
@@ -1909,7 +2129,7 @@ decoding_stream_coding_system (Lstream *stream)
  void
  set_decoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
  {
  void
  set_decoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
  {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
    struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
    str->codesys = cs;
    if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
    struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
    str->codesys = cs;
    if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
@@ -2026,8 +2246,15 @@ mule_decode (Lstream *decoding, CONST unsigned char *src,
      case CODESYS_BIG5:
        decode_coding_big5 (decoding, src, dst, n);
        break;
      case CODESYS_BIG5:
        decode_coding_big5 (decoding, src, dst, n);
        break;
+    case CODESYS_UCS4:
+      decode_coding_ucs4 (decoding, src, dst, n);
+      break;
+    case CODESYS_UTF8:
+      decode_coding_utf8 (decoding, src, dst, n);
+      break;
      case CODESYS_CCL:
      case CODESYS_CCL:
-      ccl_driver (&str->ccl, src, dst, n, 0);
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING);
        break;
      case CODESYS_ISO2022:
        decode_coding_iso2022 (decoding, src, dst, n);
        break;
      case CODESYS_ISO2022:
        decode_coding_iso2022 (decoding, src, dst, n);
@@ -2117,7 +2344,7 @@ BUFFER defaults to the current buffer if unspecified.
  struct encoding_stream
  {
    /* Coding system that governs the conversion. */
  struct encoding_stream
  {
    /* Coding system that governs the conversion. */
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
  
    /* Stream that we read the encoded data from or
       write the decoded data to. */
  
    /* Stream that we read the encoded data from or
       write the decoded data to. */
@@ -2192,7 +2419,7 @@ encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
       and automatically marked. */
  
    XSETLSTREAM (str_obj, str);
       and automatically marked. */
  
    XSETLSTREAM (str_obj, str);
-  (markobj) (str_obj);
+  markobj (str_obj);
    if (str->imp->marker)
      return (str->imp->marker) (str_obj, markobj);
    else
    if (str->imp->marker)
      return (str->imp->marker) (str_obj, markobj);
    else
@@ -2305,7 +2532,11 @@ reset_encoding_stream (struct encoding_stream *str)
         str->iso2022.register_right = 1;
         str->iso2022.current_charset = Qnil;
         str->iso2022.current_half = 0;
         str->iso2022.register_right = 1;
         str->iso2022.current_charset = Qnil;
         str->iso2022.current_half = 0;
+#ifdef UTF2000
+       str->iso2022.current_char_boundary = 0;
+#else
         str->iso2022.current_char_boundary = 1;
         str->iso2022.current_char_boundary = 1;
+#endif
         break;
        }
      case CODESYS_CCL:
         break;
        }
      case CODESYS_CCL:
@@ -2368,7 +2599,7 @@ encoding_stream_coding_system (Lstream *stream)
  void
  set_encoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
  {
  void
  set_encoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
  {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
    struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
    str->codesys = cs;
    reset_encoding_stream (str);
    struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
    str->codesys = cs;
    reset_encoding_stream (str);
@@ -2432,8 +2663,15 @@ mule_encode (Lstream *encoding, CONST unsigned char *src,
      case CODESYS_BIG5:
        encode_coding_big5 (encoding, src, dst, n);
        break;
      case CODESYS_BIG5:
        encode_coding_big5 (encoding, src, dst, n);
        break;
+    case CODESYS_UCS4:
+      encode_coding_ucs4 (encoding, src, dst, n);
+      break;
+    case CODESYS_UTF8:
+      encode_coding_utf8 (encoding, src, dst, n);
+      break;
      case CODESYS_CCL:
      case CODESYS_CCL:
-      ccl_driver (&str->ccl, src, dst, n, 0);
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING);
        break;
      case CODESYS_ISO2022:
        encode_coding_iso2022 (encoding, src, dst, n);
        break;
      case CODESYS_ISO2022:
        encode_coding_iso2022 (encoding, src, dst, n);
@@ -2517,9 +2755,9 @@ text.  BUFFER defaults to the current buffer if unspecified.
  
  /* Shift-JIS is a coding system encoding three character sets: ASCII, right
     half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
  
  /* Shift-JIS is a coding system encoding three character sets: ASCII, right
     half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
-   as is.  A character of JISX0201-Kana (TYPE94 character set) is
+   as is.  A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is
     encoded by "position-code + 0x80".  A character of JISX0208
     encoded by "position-code + 0x80".  A character of JISX0208
-   (TYPE94x94 character set) is encoded in 2-byte but two
+   (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two
     position-codes are divided and shifted so that it fit in the range
     below.
  
     position-codes are divided and shifted so that it fit in the range
     below.
  
@@ -2576,12 +2814,10 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
                     unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
                     unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
  
    while (n--)
      {
  
    while (n--)
      {
@@ -2594,10 +2830,16 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
             {
               unsigned char e1, e2;
  
             {
               unsigned char e1, e2;
  
-             Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
               DECODE_SJIS (ch, c, e1, e2);
               DECODE_SJIS (ch, c, e1, e2);
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_japanese_jisx0208,
+                                           e1 & 0x7F,
+                                           e2 & 0x7F), dst);
+#else
+             Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
               Dynarr_add (dst, e1);
               Dynarr_add (dst, e2);
               Dynarr_add (dst, e1);
               Dynarr_add (dst, e2);
+#endif
             }
           else
             {
             }
           else
             {
@@ -2613,8 +2855,13 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
             ch = c;
           else if (BYTE_SJIS_KATAKANA_P (c))
             {
             ch = c;
           else if (BYTE_SJIS_KATAKANA_P (c))
             {
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_katakana_jisx0201,
+                                           c & 0x7F, 0), dst);
+#else
               Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201);
               Dynarr_add (dst, c);
               Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201);
               Dynarr_add (dst, c);
+#endif
             }
           else
             DECODE_ADD_BINARY_CHAR (c, dst);
             }
           else
             DECODE_ADD_BINARY_CHAR (c, dst);
@@ -2624,7 +2871,8 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  /* Convert internally-formatted data to Shift-JIS. */
  }
  
  /* Convert internally-formatted data to Shift-JIS. */
@@ -2635,15 +2883,85 @@ encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
  
    while (n--)
      {
        c = *src++;
  
    while (n--)
      {
        c = *src++;
+#ifdef UTF2000
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         {
+           Lisp_Object charset;
+           unsigned int c1, c2, s1, s2;
+           
+           BREAKUP_CHAR (ch, charset, c1, c2);
+           if (EQ(charset, Vcharset_katakana_jisx0201))
+             {
+               Dynarr_add (dst, c1 | 0x80);
+             }
+           else if (EQ(charset, Vcharset_japanese_jisx0208))
+             {
+               ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+               Dynarr_add (dst, s1);
+               Dynarr_add (dst, s2);
+             }
+         }
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+#else
        if (c == '\n')
         {
           if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
        if (c == '\n')
         {
           if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
@@ -2680,9 +2998,14 @@ encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
               ch = 0;
             }
         }
               ch = 0;
             }
         }
+#endif
      }
  
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
  }
  
  DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
  }
  
  DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
@@ -2748,8 +3071,8 @@ Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
  
     Since the number of characters in Big5 is larger than maximum
     characters in Emacs' charset (96x96), it can't be handled as one
  
     Since the number of characters in Big5 is larger than maximum
     characters in Emacs' charset (96x96), it can't be handled as one
-   charset.  So, in Emacs, Big5 is devided into two: `charset-big5-1'
-   and `charset-big5-2'.  Both <type>s are TYPE94x94.  The former
+   charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
+   and `charset-big5-2'.  Both <type>s are DIMENSION2_CHARS94.  The former
     contains frequently used characters and the latter contains less
     frequently used characters.  */
  
     contains frequently used characters and the latter contains less
     frequently used characters.  */
  
@@ -2865,12 +3188,10 @@ decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
                     unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
                     unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
  
    while (n--)
      {
  
    while (n--)
      {
@@ -2906,7 +3227,8 @@ decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  /* Convert internally-formatted data to Big5. */
  }
  
  /* Convert internally-formatted data to Big5. */
@@ -2915,13 +3237,12 @@ static void
  encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
                     unsigned_char_dynarr *dst, unsigned int n)
  {
  encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
                     unsigned_char_dynarr *dst, unsigned int n)
  {
+#ifndef UTF2000
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
  
    while (n--)
      {
  
    while (n--)
      {
@@ -2969,7 +3290,9 @@ encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
        ch = 0;
      }
  
        ch = 0;
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#endif
  }
  
  
  }
  
  
@@ -2990,7 +3313,7 @@ Return the corresponding character.
    if (BYTE_BIG5_TWO_BYTE_1_P (b1) &&
        BYTE_BIG5_TWO_BYTE_2_P (b2))
      {
    if (BYTE_BIG5_TWO_BYTE_1_P (b1) &&
        BYTE_BIG5_TWO_BYTE_2_P (b2))
      {
-      int leading_byte;
+      Charset_ID leading_byte;
        Lisp_Object charset;
        DECODE_BIG5 (b1, b2, leading_byte, c1, c2);
        charset = CHARSET_BY_LEADING_BYTE (leading_byte);
        Lisp_Object charset;
        DECODE_BIG5 (b1, b2, leading_byte, c1, c2);
        charset = CHARSET_BY_LEADING_BYTE (leading_byte);
@@ -3024,133 +3347,856 @@ Return the corresponding character code in Big5.
  
  \f
  /************************************************************************/
  
  \f
  /************************************************************************/
-/*                           ISO2022 methods                            */
+/*                           UCS-4 methods                              */
+/*                                                                      */
+/*  UCS-4 character codes are implemented as nonnegative integers.      */
+/*                                                                      */
  /************************************************************************/
  
  /************************************************************************/
  
-/* The following note describes the coding system ISO2022 briefly.
-   Since the intention of this note is to help understanding of the
-   programs in this file, some parts are NOT ACCURATE or OVERLY
-   SIMPLIFIED.  For thorough understanding, please refer to the
-   original document of ISO2022.
+Lisp_Object ucs_to_mule_table[65536];
+Lisp_Object mule_to_ucs_table;
  
  
-   ISO2022 provides many mechanisms to encode several character sets
-   in 7-bit and 8-bit environments.  If one chooses 7-bit environment,
-   all text is encoded by codes of less than 128.  This may make the
-   encoded text a little bit longer, but the text get more stability
-   to pass through several gateways (some of them strip off MSB).
+DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
+Map UCS-4 code CODE to Mule character CHARACTER.
  
  
-   There are two kind of character sets: control character set and
-   graphic character set.  The former contains control characters such
-   as `newline' and `escape' to provide control functions (control
-   functions are provided also by escape sequence).  The latter
-   contains graphic characters such as 'A' and '-'.  Emacs recognizes
-   two control character sets and many graphic character sets.
+Return T on success, NIL on failure.
+*/
+       (code, character))
+{
+  unsigned int c;
  
  
-   Graphic character sets are classified into one of four types,
-   according to the dimension and number of characters in the set:
-   TYPE94, TYPE96, TYPE94x94, and TYPE96x96.  In addition, each
-   character set is assigned an identification byte, unique for each
-   type, called "final character" (denoted as <F> hereafter).  The <F>
-   of each character set is decided by ECMA(*) when it is registered
-   in ISO.  Code range of <F> is 0x30..0x7F (0x30..0x3F are for
-   private use only).
+  CHECK_CHAR (character);
+  CHECK_INT (code);
+  c = XINT (code);
  
  
-   Note (*): ECMA = European Computer Manufacturers Association
+  if (c < sizeof (ucs_to_mule_table))
+    {
+      ucs_to_mule_table[c] = character;
+      return Qt;
+    }
+  else
+    return Qnil;
+}
  
  
-   Here are examples of graphic character set [NAME(<F>)]:
-       o TYPE94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
-       o TYPE96 -- right-half-of-ISO8859-1('A'), ...
-       o TYPE94x94 -- GB2312('A'), JISX0208('B'), ...
-       o TYPE96x96 -- none for the moment
+static Lisp_Object
+ucs_to_char (unsigned long code)
+{
+  if (code < sizeof (ucs_to_mule_table))
+    {
+      return ucs_to_mule_table[code];
+    }
+  else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
+    {
+      unsigned int c;
+
+      code -= 0xe00000;
+      c = code % (94 * 94);
+      return make_char
+       (MAKE_CHAR (CHARSET_BY_ATTRIBUTES
+                   (CHARSET_TYPE_94X94, code / (94 * 94) + '@',
+                    CHARSET_LEFT_TO_RIGHT),
+                   c / 94 + 33, c % 94 + 33));
+    }
+  else
+    return Qnil;
+}
  
  
-   A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
-       C0 [0x00..0x1F] -- control character plane 0
-       GL [0x20..0x7F] -- graphic character plane 0
-       C1 [0x80..0x9F] -- control character plane 1
-       GR [0xA0..0xFF] -- graphic character plane 1
+DEFUN ("ucs-char", Fucs_char, 1, 1, 0, /*
+Return Mule character corresponding to UCS code CODE (a positive integer).
+*/
+       (code))
+{
+  CHECK_NATNUM (code);
+  return ucs_to_char (XINT (code));
+}
  
  
-   A control character set is directly designated and invoked to C0 or
-   C1 by an escape sequence.  The most common case is that:
-   - ISO646's  control character set is designated/invoked to C0, and
-   - ISO6429's control character set is designated/invoked to C1,
-   and usually these designations/invocations are omitted in encoded
-   text.  In a 7-bit environment, only C0 can be used, and a control
-   character for C1 is encoded by an appropriate escape sequence to
-   fit into the environment.  All control characters for C1 are
-   defined to have corresponding escape sequences.
+DEFUN ("set-char-ucs", Fset_char_ucs, 2, 2, 0, /*
+Map Mule character CHARACTER to UCS code CODE (a positive integer).
+*/
+       (character, code))
+{
+  /* #### Isn't this gilding the lily?  Fput_char_table checks its args.
+          Fset_char_ucs is more restrictive on index arg, but should
+          check code arg in a char_table method. */
+  CHECK_CHAR (character);
+  CHECK_NATNUM (code);
+  return Fput_char_table (character, code, mule_to_ucs_table);
+}
  
  
-   A graphic character set is at first designated to one of four
-   graphic registers (G0 through G3), then these graphic registers are
-   invoked to GL or GR.  These designations and invocations can be
-   done independently.  The most common case is that G0 is invoked to
-   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
-   these invocations and designations are omitted in encoded text.
-   In a 7-bit environment, only GL can be used.
+DEFUN ("char-ucs", Fchar_ucs, 1, 1, 0, /*
+Return the UCS code (a positive integer) corresponding to CHARACTER.
+*/
+       (character))
+{
+  return Fget_char_table (character, mule_to_ucs_table);
+}
  
  
-   When a graphic character set of TYPE94 or TYPE94x94 is invoked to
-   GL, codes 0x20 and 0x7F of the GL area work as control characters
-   SPACE and DEL respectively, and code 0xA0 and 0xFF of GR area
-   should not be used.
+#ifdef UTF2000
+#define decode_ucs4 DECODE_ADD_UCS_CHAR
+#else
+/* Decode a UCS-4 character into a buffer.  If the lookup fails, use
+   <GETA MARK> (U+3013) of JIS X 0208, which means correct character
+   is not found, instead.
+   #### do something more appropriate (use blob?)
+        Danger, Will Robinson!  Data loss.  Should we signal user? */
+static void
+decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst)
+{
+  Lisp_Object chr = ucs_to_char (ch);
  
  
-   There are two ways of invocation: locking-shift and single-shift.
-   With locking-shift, the invocation lasts until the next different
-   invocation, whereas with single-shift, the invocation works only
-   for the following character and doesn't affect locking-shift.
-   Invocations are done by the following control characters or escape
-   sequences.
+  if (! NILP (chr))
+    {
+      Bufbyte work[MAX_EMCHAR_LEN];
+      int len;
+
+      ch = XCHAR (chr);
+      len = (ch < 128) ?
+       simple_set_charptr_emchar (work, ch) :
+       non_ascii_set_charptr_emchar (work, ch);
+      Dynarr_add_many (dst, work, len);
+    }
+  else
+    {
+      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
+      Dynarr_add (dst, 34 + 128);
+      Dynarr_add (dst, 46 + 128);
+    }
+}
+#endif
  
  
-   ----------------------------------------------------------------------
-   abbrev  function                 cntrl escape seq   description
-   ----------------------------------------------------------------------
-   SI/LS0  (shift-in)               0x0F  none         invoke G0 into GL
-   SO/LS1  (shift-out)              0x0E  none         invoke G1 into GL
-   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR
-   LS2     (locking-shift-2)        none  ESC 'n'      invoke G2 into GL
-   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR
-   LS3     (locking-shift-3)        none  ESC 'o'      invoke G3 into GL
-   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR
-   SS2     (single-shift-2)         0x8E  ESC 'N'      invoke G2 for one char
-   SS3     (single-shift-3)         0x8F  ESC 'O'      invoke G3 for one char
-   ----------------------------------------------------------------------
-   The first four are for locking-shift.  Control characters for these
-   functions are defined by macros ISO_CODE_XXX in `coding.h'.
+static unsigned long
+mule_char_to_ucs4 (Lisp_Object charset,
+                  unsigned char h, unsigned char l)
+{
+  Lisp_Object code
+    = Fget_char_table (make_char (MAKE_CHAR (charset, h & 127, l & 127)),
+                      mule_to_ucs_table);
  
  
-   Designations are done by the following escape sequences.
-   ----------------------------------------------------------------------
-   escape sequence     description
-   ----------------------------------------------------------------------
-   ESC '(' <F>         designate TYPE94<F> to G0
-   ESC ')' <F>         designate TYPE94<F> to G1
-   ESC '*' <F>         designate TYPE94<F> to G2
-   ESC '+' <F>         designate TYPE94<F> to G3
-   ESC ',' <F>         designate TYPE96<F> to G0 (*)
-   ESC '-' <F>         designate TYPE96<F> to G1
-   ESC '.' <F>         designate TYPE96<F> to G2
-   ESC '/' <F>         designate TYPE96<F> to G3
-   ESC '$' '(' <F>     designate TYPE94x94<F> to G0 (**)
-   ESC '$' ')' <F>     designate TYPE94x94<F> to G1
-   ESC '$' '*' <F>     designate TYPE94x94<F> to G2
-   ESC '$' '+' <F>     designate TYPE94x94<F> to G3
-   ESC '$' ',' <F>     designate TYPE96x96<F> to G0 (*)
-   ESC '$' '-' <F>     designate TYPE96x96<F> to G1
-   ESC '$' '.' <F>     designate TYPE96x96<F> to G2
-   ESC '$' '/' <F>     designate TYPE96x96<F> to G3
-   ----------------------------------------------------------------------
-   In this list, "TYPE94<F>" means a graphic character set of type TYPE94
-   and final character <F>, and etc.
+  if (INTP (code))
+    {
+      return XINT (code);
+    }
+  else if ( (XCHARSET_DIMENSION (charset) == 2) &&
+           (XCHARSET_CHARS (charset) == 94) )
+    {
+      unsigned char final = XCHARSET_FINAL (charset);
  
  
-   Note (*): Although these designations are not allowed in ISO2022,
-   Emacs accepts them on decoding, and produces them on encoding
-   TYPE96 or TYPE96x96 character set in a coding system which is
-   characterized as 7-bit environment, non-locking-shift, and
-   non-single-shift.
+      if ( ('@' <= final) && (final < 0x7f) )
+       {
+         return 0xe00000 + (final - '@') * 94 * 94
+           + ((h & 127) - 33) * 94 + (l & 127) - 33;
+       }
+      else
+       {
+         return '?';
+       }
+    }
+  else
+    {
+      return '?';
+    }
+}
  
  
-   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
-   '(' can be omitted.  We call this as "short-form" here after.
+static void
+encode_ucs4 (Lisp_Object charset,
+            unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
+{
+  unsigned long code = mule_char_to_ucs4 (charset, h, l);
+  Dynarr_add (dst,  code >> 24);
+  Dynarr_add (dst, (code >> 16) & 255);
+  Dynarr_add (dst, (code >>  8) & 255);
+  Dynarr_add (dst,  code        & 255);
+}
  
  
-   Now you may notice that there are a lot of ways for encoding the
+static int
+detect_coding_ucs4 (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  while (n--)
+    {
+      int c = *src++;
+      switch (st->ucs4.in_byte)
+       {
+       case 0:
+         if (c >= 128)
+           return 0;
+         else
+           st->ucs4.in_byte++;
+         break;
+       case 3:
+         st->ucs4.in_byte = 0;
+         break;
+       default:
+         st->ucs4.in_byte++;
+       }
+    }
+  return CODING_CATEGORY_UCS4_MASK;
+}
+
+static void
+decode_coding_ucs4 (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags = str->flags;
+  unsigned int ch    = str->ch;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+       {
+       case 0:
+         ch = c;
+         counter = 3;
+         break;
+       case 1:
+         decode_ucs4 ( ( ch << 8 ) | c, dst);
+         ch = 0;
+         counter = 0;
+         break;
+       default:
+         ch = ( ch << 8 ) | c;
+         counter--;
+       }
+    }
+  if (counter & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+static void
+encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+#ifndef UTF2000
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags = str->flags;
+  unsigned int ch = str->ch;
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+  Lisp_Object charset = str->iso2022.current_charset;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* flags for handling composite chars.  We do a little switcharoo
+     on the source while we're outputting the composite char. */
+  unsigned int saved_n = 0;
+  CONST unsigned char *saved_src = NULL;
+  int in_composite = 0;
+
+ back_to_square_n:
+#endif
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+
+      if (BYTE_ASCII_P (c))
+       {               /* Processing ASCII character */
+         ch = 0;
+         encode_ucs4 (Vcharset_ascii, c, 0, dst);
+         char_boundary = 1;
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+       { /* Processing Leading Byte */
+         ch = 0;
+         charset = CHARSET_BY_LEADING_BYTE (c);
+         if (LEADING_BYTE_PREFIX_P(c))
+           ch = c;
+         char_boundary = 0;
+       }
+      else
+       {                       /* Processing Non-ASCII character */
+         char_boundary = 1;
+         if (EQ (charset, Vcharset_control_1))
+           {
+             encode_ucs4 (Vcharset_control_1, c, 0, dst);
+           }
+         else
+           {
+             switch (XCHARSET_REP_BYTES (charset))
+               {
+               case 2:
+                 encode_ucs4 (charset, c, 0, dst);
+                 break;
+               case 3:
+                 if (XCHARSET_PRIVATE_P (charset))
+                   {
+                     encode_ucs4 (charset, c, 0, dst);
+                     ch = 0;
+                   }
+                 else if (ch)
+                   {
+#ifdef ENABLE_COMPOSITE_CHARS
+                     if (EQ (charset, Vcharset_composite))
+                       {
+                         if (in_composite)
+                           {
+                             /* #### Bother! We don't know how to
+                                handle this yet. */
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, '~');
+                           }
+                         else
+                           {
+                             Emchar emch = MAKE_CHAR (Vcharset_composite,
+                                                      ch & 0x7F, c & 0x7F);
+                             Lisp_Object lstr = composite_char_string (emch);
+                             saved_n = n;
+                             saved_src = src;
+                             in_composite = 1;
+                             src = XSTRING_DATA   (lstr);
+                             n   = XSTRING_LENGTH (lstr);
+                           }
+                       }
+                     else
+#endif /* ENABLE_COMPOSITE_CHARS */
+                       {
+                         encode_ucs4(charset, ch, c, dst);
+                       }
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               case 4:
+                 if (ch)
+                   {
+                     encode_ucs4 (charset, ch, c, dst);
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               default:
+                 abort ();
+               }
+           }
+       }
+    }
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (in_composite)
+    {
+      n = saved_n;
+      src = saved_src;
+      in_composite = 0;
+      goto back_to_square_n; /* Wheeeeeeeee ..... */
+    }
+#endif /* ENABLE_COMPOSITE_CHARS */
+
+  str->flags = flags;
+  str->ch = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+  str->iso2022.current_charset = charset;
+
+  /* Verbum caro factum est! */
+#endif
+}
+
+\f
+/************************************************************************/
+/*                           UTF-8 methods                              */
+/************************************************************************/
+
+static int
+detect_coding_utf8 (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (st->utf8.in_byte)
+       {
+       case 0:
+         if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+           return 0;
+         else if (c >= 0xfc)
+           st->utf8.in_byte = 5;
+         else if (c >= 0xf8)
+           st->utf8.in_byte = 4;
+         else if (c >= 0xf0)
+           st->utf8.in_byte = 3;
+         else if (c >= 0xe0)
+           st->utf8.in_byte = 2;
+         else if (c >= 0xc0)
+           st->utf8.in_byte = 1;
+         else if (c >= 0x80)
+           return 0;
+         break;
+       default:
+         if ((c & 0xc0) != 0x80)
+           return 0;
+         else
+           st->utf8.in_byte--;
+       }
+    }
+  return CODING_CATEGORY_UTF8_MASK;
+}
+
+static void
+decode_coding_utf8 (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             counter = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             counter = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             counter = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             counter = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             counter = 1;
+           }
+         else
+           {
+             DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+             decode_ucs4 (c, dst);
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         decode_ucs4 (ch, dst);
+         ch = 0;
+         counter = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         counter--;
+       }
+    label_continue_loop:;
+    }
+
+  if (flags & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+#ifndef UTF2000
+static void
+encode_utf8 (Lisp_Object charset,
+            unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
+{
+  unsigned long code = mule_char_to_ucs4 (charset, h, l);
+  if ( code <= 0x7f )
+    {
+      Dynarr_add (dst, code);
+    }
+  else if ( code <= 0x7ff )
+    {
+      Dynarr_add (dst, (code >> 6) | 0xc0);
+      Dynarr_add (dst, (code & 0x3f) | 0x80);
+    }
+  else if ( code <= 0xffff )
+    {
+      Dynarr_add (dst,  (code >> 12) | 0xe0);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (code >> 18) | 0xf0);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (code >> 24) | 0xf8);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (code >> 30) | 0xfc);
+      Dynarr_add (dst, ((code >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+}
+#endif
+
+static void
+encode_coding_utf8 (Lstream *encoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#ifdef UTF2000
+
+  while (n--)
+    {
+      unsigned char c = *src++;          
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 1;
+           }
+         else
+           {
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         Dynarr_add (dst, c);
+         char_boundary = 0;
+         break;
+       default:
+         Dynarr_add (dst, c);
+         char_boundary--;
+       }
+    }
+#else /* not UTF2000 */
+  Lisp_Object charset = str->iso2022.current_charset;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* flags for handling composite chars.  We do a little switcharoo
+     on the source while we're outputting the composite char. */
+  unsigned int saved_n = 0;
+  CONST unsigned char *saved_src = NULL;
+  int in_composite = 0;
+
+ back_to_square_n:
+#endif /* ENABLE_COMPOSITE_CHARS */
+  
+  while (n--)
+    {
+      unsigned char c = *src++;
+
+      if (BYTE_ASCII_P (c))
+       {               /* Processing ASCII character */
+         ch = 0;
+         if (c == '\n')
+           {
+             if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+               Dynarr_add (dst, '\r');
+             if (eol_type != EOL_CR)
+               Dynarr_add (dst, c);
+           }
+         else
+           encode_utf8 (Vcharset_ascii, c, 0, dst);
+         char_boundary = 1;
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+       { /* Processing Leading Byte */
+         ch = 0;
+         charset = CHARSET_BY_LEADING_BYTE (c);
+         if (LEADING_BYTE_PREFIX_P(c))
+           ch = c;
+         char_boundary = 0;
+       }
+      else
+       {                       /* Processing Non-ASCII character */
+         char_boundary = 1;
+         if (EQ (charset, Vcharset_control_1))
+           {
+             encode_utf8 (Vcharset_control_1, c, 0, dst);
+           }
+         else
+           {
+             switch (XCHARSET_REP_BYTES (charset))
+               {
+               case 2:
+                 encode_utf8 (charset, c, 0, dst);
+                 break;
+               case 3:
+                 if (XCHARSET_PRIVATE_P (charset))
+                   {
+                     encode_utf8 (charset, c, 0, dst);
+                     ch = 0;
+                   }
+                 else if (ch)
+                   {
+#ifdef ENABLE_COMPOSITE_CHARS
+                     if (EQ (charset, Vcharset_composite))
+                       {
+                         if (in_composite)
+                           {
+                             /* #### Bother! We don't know how to
+                                handle this yet. */
+                             encode_utf8 (Vcharset_ascii, '~', 0, dst);
+                           }
+                         else
+                           {
+                             Emchar emch = MAKE_CHAR (Vcharset_composite,
+                                                      ch & 0x7F, c & 0x7F);
+                             Lisp_Object lstr = composite_char_string (emch);
+                             saved_n = n;
+                             saved_src = src;
+                             in_composite = 1;
+                             src = XSTRING_DATA   (lstr);
+                             n   = XSTRING_LENGTH (lstr);
+                           }
+                       }
+                     else
+#endif /* ENABLE_COMPOSITE_CHARS */
+                       {
+                         encode_utf8 (charset, ch, c, dst);
+                       }
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               case 4:
+                 if (ch)
+                   {
+                     encode_utf8 (charset, ch, c, dst);
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               default:
+                 abort ();
+               }
+           }
+       }
+    }
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (in_composite)
+    {
+      n = saved_n;
+      src = saved_src;
+      in_composite = 0;
+      goto back_to_square_n; /* Wheeeeeeeee ..... */
+    }
+#endif
+
+#endif /* not UTF2000 */
+  str->flags = flags;
+  str->ch    = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+#ifndef UTF2000
+  str->iso2022.current_charset = charset;
+#endif
+
+  /* Verbum caro factum est! */
+}
+
+\f
+/************************************************************************/
+/*                           ISO2022 methods                            */
+/************************************************************************/
+
+/* The following note describes the coding system ISO2022 briefly.
+   Since the intention of this note is to help understand the
+   functions in this file, some parts are NOT ACCURATE or OVERLY
+   SIMPLIFIED.  For thorough understanding, please refer to the
+   original document of ISO2022.
+
+   ISO2022 provides many mechanisms to encode several character sets
+   in 7-bit and 8-bit environments.  For 7-bit environments, all text
+   is encoded using bytes less than 128.  This may make the encoded
+   text a little bit longer, but the text passes more easily through
+   several gateways, some of which strip off MSB (Most Signigant Bit).
+
+   There are two kinds of character sets: control character set and
+   graphic character set.  The former contains control characters such
+   as `newline' and `escape' to provide control functions (control
+   functions are also provided by escape sequences).  The latter
+   contains graphic characters such as 'A' and '-'.  Emacs recognizes
+   two control character sets and many graphic character sets.
+
+   Graphic character sets are classified into one of the following
+   four classes, according to the number of bytes (DIMENSION) and
+   number of characters in one dimension (CHARS) of the set:
+   - DIMENSION1_CHARS94
+   - DIMENSION1_CHARS96
+   - DIMENSION2_CHARS94
+   - DIMENSION2_CHARS96
+
+   In addition, each character set is assigned an identification tag,
+   unique for each set, called "final character" (denoted as <F>
+   hereafter).  The <F> of each character set is decided by ECMA(*)
+   when it is registered in ISO.  The code range of <F> is 0x30..0x7F
+   (0x30..0x3F are for private use only).
+
+   Note (*): ECMA = European Computer Manufacturers Association
+
+   Here are examples of graphic character set [NAME(<F>)]:
+       o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
+       o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
+       o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
+       o DIMENSION2_CHARS96 -- none for the moment
+
+   A code area (1 byte = 8 bits) is divided into 4 areas, C0, GL, C1, and GR.
+       C0 [0x00..0x1F] -- control character plane 0
+       GL [0x20..0x7F] -- graphic character plane 0
+       C1 [0x80..0x9F] -- control character plane 1
+       GR [0xA0..0xFF] -- graphic character plane 1
+
+   A control character set is directly designated and invoked to C0 or
+   C1 by an escape sequence.  The most common case is that:
+   - ISO646's  control character set is designated/invoked to C0, and
+   - ISO6429's control character set is designated/invoked to C1,
+   and usually these designations/invocations are omitted in encoded
+   text.  In a 7-bit environment, only C0 can be used, and a control
+   character for C1 is encoded by an appropriate escape sequence to
+   fit into the environment.  All control characters for C1 are
+   defined to have corresponding escape sequences.
+
+   A graphic character set is at first designated to one of four
+   graphic registers (G0 through G3), then these graphic registers are
+   invoked to GL or GR.  These designations and invocations can be
+   done independently.  The most common case is that G0 is invoked to
+   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
+   these invocations and designations are omitted in encoded text.
+   In a 7-bit environment, only GL can be used.
+
+   When a graphic character set of CHARS94 is invoked to GL, codes
+   0x20 and 0x7F of the GL area work as control characters SPACE and
+   DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
+   be used.
+
+   There are two ways of invocation: locking-shift and single-shift.
+   With locking-shift, the invocation lasts until the next different
+   invocation, whereas with single-shift, the invocation affects the
+   following character only and doesn't affect the locking-shift
+   state.  Invocations are done by the following control characters or
+   escape sequences:
+
+   ----------------------------------------------------------------------
+   abbrev  function                 cntrl escape seq   description
+   ----------------------------------------------------------------------
+   SI/LS0  (shift-in)               0x0F  none         invoke G0 into GL
+   SO/LS1  (shift-out)              0x0E  none         invoke G1 into GL
+   LS2     (locking-shift-2)        none  ESC 'n'      invoke G2 into GL
+   LS3     (locking-shift-3)        none  ESC 'o'      invoke G3 into GL
+   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
+   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
+   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
+   SS2     (single-shift-2)         0x8E  ESC 'N'      invoke G2 for one char
+   SS3     (single-shift-3)         0x8F  ESC 'O'      invoke G3 for one char
+   ----------------------------------------------------------------------
+   (*) These are not used by any known coding system.
+
+   Control characters for these functions are defined by macros
+   ISO_CODE_XXX in `coding.h'.
+
+   Designations are done by the following escape sequences:
+   ----------------------------------------------------------------------
+   escape sequence     description
+   ----------------------------------------------------------------------
+   ESC '(' <F>         designate DIMENSION1_CHARS94<F> to G0
+   ESC ')' <F>         designate DIMENSION1_CHARS94<F> to G1
+   ESC '*' <F>         designate DIMENSION1_CHARS94<F> to G2
+   ESC '+' <F>         designate DIMENSION1_CHARS94<F> to G3
+   ESC ',' <F>         designate DIMENSION1_CHARS96<F> to G0 (*)
+   ESC '-' <F>         designate DIMENSION1_CHARS96<F> to G1
+   ESC '.' <F>         designate DIMENSION1_CHARS96<F> to G2
+   ESC '/' <F>         designate DIMENSION1_CHARS96<F> to G3
+   ESC '$' '(' <F>     designate DIMENSION2_CHARS94<F> to G0 (**)
+   ESC '$' ')' <F>     designate DIMENSION2_CHARS94<F> to G1
+   ESC '$' '*' <F>     designate DIMENSION2_CHARS94<F> to G2
+   ESC '$' '+' <F>     designate DIMENSION2_CHARS94<F> to G3
+   ESC '$' ',' <F>     designate DIMENSION2_CHARS96<F> to G0 (*)
+   ESC '$' '-' <F>     designate DIMENSION2_CHARS96<F> to G1
+   ESC '$' '.' <F>     designate DIMENSION2_CHARS96<F> to G2
+   ESC '$' '/' <F>     designate DIMENSION2_CHARS96<F> to G3
+   ----------------------------------------------------------------------
+
+   In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
+   of dimension 1, chars 94, and final character <F>, etc...
+
+   Note (*): Although these designations are not allowed in ISO2022,
+   Emacs accepts them on decoding, and produces them on encoding
+   CHARS96 character sets in a coding system which is characterized as
+   7-bit environment, non-locking-shift, and non-single-shift.
+
+   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
+   '(' can be omitted.  We refer to this as "short-form" hereafter.
+
+   Now you may notice that there are a lot of ways for encoding the
     same multilingual text in ISO2022.  Actually, there exist many
     same multilingual text in ISO2022.  Actually, there exist many
-   coding systems such as Compound Text (used in X's inter client
+   coding systems such as Compound Text (used in X11's inter client
     communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
     (used in Korean internet), EUC (Extended UNIX Code, used in Asian
     localized platforms), and all of these are variants of ISO2022.
     communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
     (used in Korean internet), EUC (Extended UNIX Code, used in Asian
     localized platforms), and all of these are variants of ISO2022.
@@ -3159,19 +4205,19 @@ Return the corresponding character code in Big5.
     sequences: ISO6429's direction specification and Emacs' private
     sequence for specifying character composition.
  
     sequences: ISO6429's direction specification and Emacs' private
     sequence for specifying character composition.
  
-   ISO6429's direction specification takes the following format:
+   ISO6429's direction specification takes the following form:
         o CSI ']'      -- end of the current direction
         o CSI '0' ']'  -- end of the current direction
         o CSI '1' ']'  -- start of left-to-right text
         o CSI '2' ']'  -- start of right-to-left text
     The control character CSI (0x9B: control sequence introducer) is
         o CSI ']'      -- end of the current direction
         o CSI '0' ']'  -- end of the current direction
         o CSI '1' ']'  -- start of left-to-right text
         o CSI '2' ']'  -- start of right-to-left text
     The control character CSI (0x9B: control sequence introducer) is
-   abbreviated to the escape sequence ESC '[' in 7-bit environment.
+   abbreviated to the escape sequence ESC '[' in a 7-bit environment.
  
  
-   Character composition specification takes the following format:
+   Character composition specification takes the following form:
         o ESC '0' -- start character composition
         o ESC '1' -- end character composition
         o ESC '0' -- start character composition
         o ESC '1' -- end character composition
-   Since these are not standard escape sequences of any ISO, the use
-   of them for these meanings is restricted to Emacs only.  */
+   Since these are not standard escape sequences of any ISO standard,
+   their use with these meanings is restricted to Emacs only.  */
  
  static void
  reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
  
  static void
  reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
@@ -3195,8 +4241,10 @@ reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
    iso->invalid_switch_dir = 0;
    iso->output_direction_sequence = 0;
    iso->output_literally = 0;
    iso->invalid_switch_dir = 0;
    iso->output_direction_sequence = 0;
    iso->output_literally = 0;
+#ifdef ENABLE_COMPOSITE_CHARS
    if (iso->composite_chars)
      Dynarr_reset (iso->composite_chars);
    if (iso->composite_chars)
      Dynarr_reset (iso->composite_chars);
+#endif
  }
  
  static int
  }
  
  static int
@@ -3324,6 +4372,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
           reg = 3; half = 1;
           goto locking_shift;
  
           reg = 3; half = 1;
           goto locking_shift;
  
+#ifdef ENABLE_COMPOSITE_CHARS
           /**** composite ****/
  
         case '0':
           /**** composite ****/
  
         case '0':
@@ -3337,6 +4386,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
           *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
             ~CODING_STATE_COMPOSITE;
           return 1;
           *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
             ~CODING_STATE_COMPOSITE;
           return 1;
+#endif /* ENABLE_COMPOSITE_CHARS */
  
           /**** directionality ****/
  
  
           /**** directionality ****/
  
@@ -3593,11 +4643,15 @@ static int
  detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
                        unsigned int n)
  {
  detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
                        unsigned int n)
  {
-  int c;
    int mask;
  
    /* #### There are serious deficiencies in the recognition mechanism
    int mask;
  
    /* #### There are serious deficiencies in the recognition mechanism
-     here.  This needs to be much smarter if it's going to cut it. */
+     here.  This needs to be much smarter if it's going to cut it.
+     The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while
+     it should be detected as Latin-1.
+     All the ISO2022 stuff in this file should be synced up with the
+     code from FSF Emacs-20.4, in which Mule should be more or less stable.
+     Perhaps we should wait till R2L works in FSF Emacs? */
  
    if (!st->iso2022.initted)
      {
  
    if (!st->iso2022.initted)
      {
@@ -3617,7 +4671,7 @@ detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
  
    while (n--)
      {
  
    while (n--)
      {
-      c = *src++;
+      int c = *src++;
        if (c >= 0xA0)
         {
           mask &= ~CODING_CATEGORY_ISO_7_MASK;
        if (c >= 0xA0)
         {
           mask &= ~CODING_CATEGORY_ISO_7_MASK;
@@ -3716,7 +4770,7 @@ postprocess_iso2022_mask (int mask)
     need to handle the CSI differently. */
  
  static void
     need to handle the CSI differently. */
  
  static void
-restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
+restore_left_to_right_direction (Lisp_Coding_System *codesys,
                                  unsigned_char_dynarr *dst,
                                  unsigned int *flags,
                                  int internal_p)
                                  unsigned_char_dynarr *dst,
                                  unsigned int *flags,
                                  int internal_p)
@@ -3747,7 +4801,7 @@ restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
     need to handle the CSI differently. */
  
  static void
     need to handle the CSI differently. */
  
  static void
-ensure_correct_direction (int direction, struct Lisp_Coding_System *codesys,
+ensure_correct_direction (int direction, Lisp_Coding_System *codesys,
                           unsigned_char_dynarr *dst, unsigned int *flags,
                           int internal_p)
  {
                           unsigned_char_dynarr *dst, unsigned int *flags,
                           int internal_p)
  {
@@ -3780,23 +4834,25 @@ static void
  decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                        unsigned_char_dynarr *dst, unsigned int n)
  {
  decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                        unsigned_char_dynarr *dst, unsigned int n)
  {
-  unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-  Lisp_Object coding_system;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+#ifdef ENABLE_COMPOSITE_CHARS
    unsigned_char_dynarr *real_dst = dst;
    unsigned_char_dynarr *real_dst = dst;
+#endif
+  Lisp_Object coding_system;
  
  
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
    XSETCODING_SYSTEM (coding_system, str->codesys);
  
    XSETCODING_SYSTEM (coding_system, str->codesys);
  
+#ifdef ENABLE_COMPOSITE_CHARS
    if (flags & CODING_STATE_COMPOSITE)
      dst = str->iso2022.composite_chars;
    if (flags & CODING_STATE_COMPOSITE)
      dst = str->iso2022.composite_chars;
+#endif /* ENABLE_COMPOSITE_CHARS */
  
    while (n--)
      {
  
    while (n--)
      {
-      c = *src++;
+      unsigned char c = *src++;
        if (flags & CODING_STATE_ESCAPE)
         {       /* Within ESC sequence */
           int retval = parse_iso2022_esc (coding_system, &str->iso2022,
        if (flags & CODING_STATE_ESCAPE)
         {       /* Within ESC sequence */
           int retval = parse_iso2022_esc (coding_system, &str->iso2022,
@@ -3806,6 +4862,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
             {
               switch (str->iso2022.esc)
                 {
             {
               switch (str->iso2022.esc)
                 {
+#ifdef ENABLE_COMPOSITE_CHARS
                 case ISO_ESC_START_COMPOSITE:
                   if (str->iso2022.composite_chars)
                     Dynarr_reset (str->iso2022.composite_chars);
                 case ISO_ESC_START_COMPOSITE:
                   if (str->iso2022.composite_chars)
                     Dynarr_reset (str->iso2022.composite_chars);
@@ -3824,6 +4881,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                     Dynarr_add_many (dst, comstr, len);
                     break;
                   }
                     Dynarr_add_many (dst, comstr, len);
                     break;
                   }
+#endif /* ENABLE_COMPOSITE_CHARS */
  
                 case ISO_ESC_LITERAL:
                   DECODE_ADD_BINARY_CHAR (c, dst);
  
                 case ISO_ESC_LITERAL:
                   DECODE_ADD_BINARY_CHAR (c, dst);
@@ -3898,7 +4956,9 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
        else
         {                       /* Graphic characters */
           Lisp_Object charset;
        else
         {                       /* Graphic characters */
           Lisp_Object charset;
-         int lb;
+#ifndef UTF2000
+         Charset_ID lb;
+#endif
           int reg;
  
           DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
           int reg;
  
           DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
@@ -3911,7 +4971,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
           charset = str->iso2022.charset[reg];
  
           /* Error checking: */
           charset = str->iso2022.charset[reg];
  
           /* Error checking: */
-         if (NILP (charset) || str->iso2022.invalid_designated[reg]
+         if (! CHARSETP (charset)
+             || str->iso2022.invalid_designated[reg]
               || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
                   && XCHARSET_CHARS (charset) == 94))
             /* Mrmph.  We are trying to invoke a register that has no
               || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
                   && XCHARSET_CHARS (charset) == 94))
             /* Mrmph.  We are trying to invoke a register that has no
@@ -3940,6 +5001,22 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                     charset = new_charset;
                 }
  
                     charset = new_charset;
                 }
  
+#ifdef UTF2000
+             if (XCHARSET_DIMENSION (charset) == 1)
+               {
+                 DECODE_OUTPUT_PARTIAL_CHAR (ch);
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+               }
+             else if (ch)
+               {
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+                 ch = 0;
+               }
+             else
+               ch = c;
+#else
               lb = XCHARSET_LEADING_BYTE (charset);
               switch (XCHARSET_REP_BYTES (charset))
                 {
               lb = XCHARSET_LEADING_BYTE (charset);
               switch (XCHARSET_REP_BYTES (charset))
                 {
@@ -3988,6 +5065,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                   else
                     ch = c;
                 }
                   else
                     ch = c;
                 }
+#endif
             }
  
           if (!ch)
             }
  
           if (!ch)
@@ -4000,7 +5078,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
    if (flags & CODING_STATE_END)
      DECODE_OUTPUT_PARTIAL_CHAR (ch);
  
    if (flags & CODING_STATE_END)
      DECODE_OUTPUT_PARTIAL_CHAR (ch);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  
  }
  
  
@@ -4012,7 +5091,8 @@ static void
  iso2022_designate (Lisp_Object charset, unsigned char reg,
                    struct encoding_stream *str, unsigned_char_dynarr *dst)
  {
  iso2022_designate (Lisp_Object charset, unsigned char reg,
                    struct encoding_stream *str, unsigned_char_dynarr *dst)
  {
-  CONST char *inter94 = "()*+", *inter96= ",-./";
+  static CONST char inter94[] = "()*+";
+  static CONST char inter96[] = ",-./";
    unsigned int type;
    unsigned char final;
    Lisp_Object old_charset = str->iso2022.charset[reg];
    unsigned int type;
    unsigned char final;
    Lisp_Object old_charset = str->iso2022.charset[reg];
@@ -4100,28 +5180,244 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
                        unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char charmask, c;
                        unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char charmask, c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    unsigned char char_boundary;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
    unsigned char char_boundary;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  struct Lisp_Coding_System *codesys = str->codesys;
+  unsigned int flags          = str->flags;
+  Emchar ch                   = str->ch;
+  Lisp_Coding_System *codesys = str->codesys;
+  eol_type_t eol_type         = CODING_SYSTEM_EOL_TYPE (str->codesys);
    int i;
    Lisp_Object charset;
    int half;
    int i;
    Lisp_Object charset;
    int half;
+#ifdef UTF2000
+  unsigned int byte1, byte2;
+#endif
  
  
+#ifdef ENABLE_COMPOSITE_CHARS
    /* flags for handling composite chars.  We do a little switcharoo
       on the source while we're outputting the composite char. */
    unsigned int saved_n = 0;
    CONST unsigned char *saved_src = NULL;
    int in_composite = 0;
    /* flags for handling composite chars.  We do a little switcharoo
       on the source while we're outputting the composite char. */
    unsigned int saved_n = 0;
    CONST unsigned char *saved_src = NULL;
    int in_composite = 0;
+#endif /* ENABLE_COMPOSITE_CHARS */
  
  
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
    char_boundary = str->iso2022.current_char_boundary;
    charset = str->iso2022.current_charset;
    half = str->iso2022.current_half;
  
    char_boundary = str->iso2022.current_char_boundary;
    charset = str->iso2022.current_charset;
    half = str->iso2022.current_half;
  
+#ifdef ENABLE_COMPOSITE_CHARS
   back_to_square_n:
   back_to_square_n:
+#endif
+#ifdef UTF2000
+  while (n--)
+    {
+      c = *src++;
+
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+
+             restore_left_to_right_direction (codesys, dst, &flags, 0);
+             
+             /* Make sure G0 contains ASCII */
+             if ((c > ' ' && c < ISO_CODE_DEL) ||
+                 !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys))
+               {
+                 ensure_normal_shift (str, dst);
+                 iso2022_designate (Vcharset_ascii, 0, str, dst);
+               }
+             
+             /* If necessary, restore everything to the default state
+                at end-of-line */
+             if (c == '\n' &&
+                 !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys)))
+               {
+                 restore_left_to_right_direction (codesys, dst, &flags, 0);
+
+                 ensure_normal_shift (str, dst);
+
+                 for (i = 0; i < 4; i++)
+                   {
+                     Lisp_Object initial_charset =
+                       CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+                     iso2022_designate (initial_charset, i, str, dst);
+                   }
+               }
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               {
+                 if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+                     && fit_to_be_escape_quoted (c))
+                   Dynarr_add (dst, ISO_CODE_ESC);
+                 Dynarr_add (dst, c);
+               }
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         
+         char_boundary = 0;
+         if ( (0x80 <= ch) && (ch <= 0x9f) )
+           {
+             charmask = (half == 0 ? 0x00 : 0x80);
+         
+             if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+                 && fit_to_be_escape_quoted (ch))
+               Dynarr_add (dst, ISO_CODE_ESC);
+             /* you asked for it ... */
+             Dynarr_add (dst, ch);
+           }
+         else
+           {
+             int reg;
+
+             BREAKUP_CHAR (ch, charset, byte1, byte2);
+             ensure_correct_direction (XCHARSET_DIRECTION (charset),
+                                       codesys, dst, &flags, 0);
+
+             /* Now determine which register to use. */
+             reg = -1;
+             for (i = 0; i < 4; i++)
+               {
+                 if (EQ (charset, str->iso2022.charset[i]) ||
+                     EQ (charset,
+                         CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)))
+                   {
+                     reg = i;
+                     break;
+                   }
+               }
+             
+             if (reg == -1)
+               {
+                 if (XCHARSET_GRAPHIC (charset) != 0)
+                   {
+                     if (!NILP (str->iso2022.charset[1]) &&
+                         (!CODING_SYSTEM_ISO2022_SEVEN (codesys) ||
+                          CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys)))
+                       reg = 1;
+                     else if (!NILP (str->iso2022.charset[2]))
+                       reg = 2;
+                     else if (!NILP (str->iso2022.charset[3]))
+                       reg = 3;
+                     else
+                       reg = 0;
+                   }
+                 else
+                   reg = 0;
+               }
+             
+             iso2022_designate (charset, reg, str, dst);
+             
+             /* Now invoke that register. */
+             switch (reg)
+               {
+               case 0:
+                 ensure_normal_shift (str, dst);
+                 half = 0;
+                 break;
+                 
+               case 1:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+                   {
+                     ensure_shift_out (str, dst);
+                     half = 0;
+                   }
+                 else
+                   half = 1;
+                 break;
+                 
+               case 2:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+                   {
+                     Dynarr_add (dst, ISO_CODE_ESC);
+                     Dynarr_add (dst, 'N');
+                     half = 0;
+                   }
+                 else
+                   {
+                     Dynarr_add (dst, ISO_CODE_SS2);
+                     half = 1;
+                   }
+                 break;
+                 
+               case 3:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+                   {
+                     Dynarr_add (dst, ISO_CODE_ESC);
+                     Dynarr_add (dst, 'O');
+                     half = 0;
+                   }
+                 else
+                   {
+                     Dynarr_add (dst, ISO_CODE_SS3);
+                     half = 1;
+                   }
+                 break;
+                 
+               default:
+                 abort ();
+               }
+             
+             charmask = (half == 0 ? 0x00 : 0x80);
+             
+             switch (XCHARSET_DIMENSION (charset))
+               {
+               case 1:
+                 Dynarr_add (dst, byte1 | charmask);
+                 break;
+               case 2:
+                 Dynarr_add (dst, byte1 | charmask);
+                 Dynarr_add (dst, byte2 | charmask);
+                 break;
+               default:
+                 abort ();
+               }
+           }
+         ch =0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+    }
+#else /* not UTF2000 */
+
    while (n--)
      {
        c = *src++;
    while (n--)
      {
        c = *src++;
@@ -4180,7 +5476,10 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
           if (LEADING_BYTE_PREFIX_P(c))
             ch = c;
           else if (!EQ (charset, Vcharset_control_1)
           if (LEADING_BYTE_PREFIX_P(c))
             ch = c;
           else if (!EQ (charset, Vcharset_control_1)
-                  && !EQ (charset, Vcharset_composite))
+#ifdef ENABLE_COMPOSITE_CHARS
+                  && !EQ (charset, Vcharset_composite)
+#endif
+                  )
             {
               int reg;
  
             {
               int reg;
  
@@ -4300,6 +5599,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
                     }
                   else if (ch)
                     {
                     }
                   else if (ch)
                     {
+#ifdef ENABLE_COMPOSITE_CHARS
                       if (EQ (charset, Vcharset_composite))
                         {
                           if (in_composite)
                       if (EQ (charset, Vcharset_composite))
                         {
                           if (in_composite)
@@ -4323,6 +5623,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
                             }
                         }
                       else
                             }
                         }
                       else
+#endif /* ENABLE_COMPOSITE_CHARS */
                         {
                           Dynarr_add (dst, ch & charmask);
                           Dynarr_add (dst, c & charmask);
                         {
                           Dynarr_add (dst, ch & charmask);
                           Dynarr_add (dst, c & charmask);
@@ -4354,7 +5655,9 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
             }
         }
      }
             }
         }
      }
+#endif /* not UTF2000 */
  
  
+#ifdef ENABLE_COMPOSITE_CHARS
    if (in_composite)
      {
        n = saved_n;
    if (in_composite)
      {
        n = saved_n;
@@ -4364,8 +5667,13 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
        Dynarr_add (dst, '1'); /* end composing */
        goto back_to_square_n; /* Wheeeeeeeee ..... */
      }
        Dynarr_add (dst, '1'); /* end composing */
        goto back_to_square_n; /* Wheeeeeeeee ..... */
      }
+#endif /* ENABLE_COMPOSITE_CHARS */
  
  
+#ifdef UTF2000
+  if ( (char_boundary == 0) && flags & CODING_STATE_END)
+#else
    if (char_boundary && flags & CODING_STATE_END)
    if (char_boundary && flags & CODING_STATE_END)
+#endif
      {
        restore_left_to_right_direction (codesys, dst, &flags, 0);
        ensure_normal_shift (str, dst);
      {
        restore_left_to_right_direction (codesys, dst, &flags, 0);
        ensure_normal_shift (str, dst);
@@ -4377,7 +5685,8 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
         }
      }
  
         }
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
    str->iso2022.current_char_boundary = char_boundary;
    str->iso2022.current_charset = charset;
    str->iso2022.current_half = half;
    str->iso2022.current_char_boundary = char_boundary;
    str->iso2022.current_charset = charset;
    str->iso2022.current_half = half;
@@ -4398,12 +5707,10 @@ decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
                              unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
                              unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
  
    while (n--)
      {
  
    while (n--)
      {
@@ -4416,7 +5723,8 @@ decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  static void
  }
  
  static void
@@ -4425,15 +5733,71 @@ encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
  
    while (n--)
      {
  
    while (n--)
      {
-      c = *src++;
+      c = *src++;        
+#ifdef UTF2000
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         Dynarr_add (dst, ch & 0xff);
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+#else /* not UTF2000 */
        if (c == '\n')
         {
           if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
        if (c == '\n')
         {
           if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
@@ -4469,9 +5833,14 @@ encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
              untranslatable character, so ignore it */
           ch = 0;
         }
              untranslatable character, so ignore it */
           ch = 0;
         }
+#endif /* not UTF2000 */
      }
  
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
  }
  
  \f
  }
  
  \f
@@ -4484,24 +5853,27 @@ static Bufbyte_dynarr *conversion_in_dynarr;
  
  /* Determine coding system from coding format */
  
  
  /* Determine coding system from coding format */
  
-#define FILE_NAME_CODING_SYSTEM                        \
- ((NILP (Vfile_name_coding_system) ||                  \
-   (EQ ((Vfile_name_coding_system), Qbinary))) ?       \
-  Qnil : Fget_coding_system (Vfile_name_coding_system))
-
  /* #### not correct for all values of `fmt'! */
  /* #### not correct for all values of `fmt'! */
+static Lisp_Object
+external_data_format_to_coding_system (enum external_data_format fmt)
+{
+  switch (fmt)
+    {
+    case FORMAT_FILENAME:
+    case FORMAT_TERMINAL:
+      if (EQ (Vfile_name_coding_system, Qnil) ||
+         EQ (Vfile_name_coding_system, Qbinary))
+       return Qnil;
+      else
+       return Fget_coding_system (Vfile_name_coding_system);
  #ifdef MULE
  #ifdef MULE
-#define FMT_CODING_SYSTEM(fmt)                                 \
- (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM     :   \
-  ((fmt) == FORMAT_CTEXT   ) ? Fget_coding_system (Qctext) :   \
-  ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM     :   \
-  Qnil)
-#else
-#define FMT_CODING_SYSTEM(fmt)                                 \
- (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM     :   \
-  ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM     :   \
-  Qnil)
+    case FORMAT_CTEXT:
+      return Fget_coding_system (Qctext);
  #endif
  #endif
+    default:
+      return Qnil;
+    }
+}
  
  Extbyte *
  convert_to_external_format (CONST Bufbyte *ptr,
  
  Extbyte *
  convert_to_external_format (CONST Bufbyte *ptr,
@@ -4509,7 +5881,7 @@ convert_to_external_format (CONST Bufbyte *ptr,
                             Extcount *len_out,
                             enum external_data_format fmt)
  {
                             Extcount *len_out,
                             enum external_data_format fmt)
  {
-  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
  
    if (!conversion_out_dynarr)
      conversion_out_dynarr = Dynarr_new (Extbyte);
  
    if (!conversion_out_dynarr)
      conversion_out_dynarr = Dynarr_new (Extbyte);
@@ -4522,12 +5894,17 @@ convert_to_external_format (CONST Bufbyte *ptr,
  
        for (; ptr < end;)
          {
  
        for (; ptr < end;)
          {
+#ifdef UTF2000
+          Bufbyte c =
+           (*ptr < 0xc0) ? *ptr :
+           ((*ptr & 0x1f) << 6) | (*(ptr+1) & 0x3f);
+#else
            Bufbyte c =
              (BYTE_ASCII_P (*ptr))                 ? *ptr :
              (*ptr == LEADING_BYTE_CONTROL_1)      ? (*(ptr+1) - 0x20) :
              (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
              '~';
            Bufbyte c =
              (BYTE_ASCII_P (*ptr))                 ? *ptr :
              (*ptr == LEADING_BYTE_CONTROL_1)      ? (*(ptr+1) - 0x20) :
              (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
              '~';
-
+#endif
            Dynarr_add (conversion_out_dynarr, (Extbyte) c);
            INC_CHARPTR (ptr);
          }
            Dynarr_add (conversion_out_dynarr, (Extbyte) c);
            INC_CHARPTR (ptr);
          }
@@ -4577,7 +5954,7 @@ convert_from_external_format (CONST Extbyte *ptr,
                               Bytecount *len_out,
                               enum external_data_format fmt)
  {
                               Bytecount *len_out,
                               enum external_data_format fmt)
  {
-  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
  
    if (!conversion_in_dynarr)
      conversion_in_dynarr = Dynarr_new (Bufbyte);
  
    if (!conversion_in_dynarr)
      conversion_in_dynarr = Dynarr_new (Bufbyte);
@@ -4634,7 +6011,7 @@ convert_from_external_format (CONST Extbyte *ptr,
  /************************************************************************/
  
  void
  /************************************************************************/
  
  void
-syms_of_mule_coding (void)
+syms_of_file_coding (void)
  {
    defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
    deferror (&Qcoding_system_error, "coding-system-error",
  {
    defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
    deferror (&Qcoding_system_error, "coding-system-error",
@@ -4647,6 +6024,7 @@ syms_of_mule_coding (void)
    DEFSUBR (Fcoding_system_name);
    DEFSUBR (Fmake_coding_system);
    DEFSUBR (Fcopy_coding_system);
    DEFSUBR (Fcoding_system_name);
    DEFSUBR (Fmake_coding_system);
    DEFSUBR (Fcopy_coding_system);
+  DEFSUBR (Fdefine_coding_system_alias);
    DEFSUBR (Fsubsidiary_coding_system);
  
    DEFSUBR (Fcoding_system_type);
    DEFSUBR (Fsubsidiary_coding_system);
  
    DEFSUBR (Fcoding_system_type);
@@ -4670,12 +6048,19 @@ syms_of_mule_coding (void)
    DEFSUBR (Fencode_shift_jis_char);
    DEFSUBR (Fdecode_big5_char);
    DEFSUBR (Fencode_big5_char);
    DEFSUBR (Fencode_shift_jis_char);
    DEFSUBR (Fdecode_big5_char);
    DEFSUBR (Fencode_big5_char);
+  DEFSUBR (Fset_ucs_char);
+  DEFSUBR (Fucs_char);
+  DEFSUBR (Fset_char_ucs);
+  DEFSUBR (Fchar_ucs);
  #endif /* MULE */
    defsymbol (&Qcoding_system_p, "coding-system-p");
    defsymbol (&Qno_conversion, "no-conversion");
  #endif /* MULE */
    defsymbol (&Qcoding_system_p, "coding-system-p");
    defsymbol (&Qno_conversion, "no-conversion");
+  defsymbol (&Qraw_text, "raw-text");
  #ifdef MULE
    defsymbol (&Qbig5, "big5");
    defsymbol (&Qshift_jis, "shift-jis");
  #ifdef MULE
    defsymbol (&Qbig5, "big5");
    defsymbol (&Qshift_jis, "shift-jis");
+  defsymbol (&Qucs4, "ucs-4");
+  defsymbol (&Qutf8, "utf-8");
    defsymbol (&Qccl, "ccl");
    defsymbol (&Qiso2022, "iso2022");
  #endif /* MULE */
    defsymbol (&Qccl, "ccl");
    defsymbol (&Qiso2022, "iso2022");
  #endif /* MULE */
@@ -4719,6 +6104,10 @@ syms_of_mule_coding (void)
              "shift-jis");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
              "big5");
              "shift-jis");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
              "big5");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4],
+            "ucs-4");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UTF8],
+            "utf-8");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_7],
              "iso-7");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_DESIGNATE],
    defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_7],
              "iso-7");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_DESIGNATE],
@@ -4735,7 +6124,7 @@ syms_of_mule_coding (void)
  }
  
  void
  }
  
  void
-lstream_type_create_mule_coding (void)
+lstream_type_create_file_coding (void)
  {
    LSTREAM_HAS_METHOD (decoding, reader);
    LSTREAM_HAS_METHOD (decoding, writer);
  {
    LSTREAM_HAS_METHOD (decoding, reader);
    LSTREAM_HAS_METHOD (decoding, writer);
@@ -4755,7 +6144,7 @@ lstream_type_create_mule_coding (void)
  }
  
  void
  }
  
  void
-vars_of_mule_coding (void)
+vars_of_file_coding (void)
  {
    int i;
  
  {
    int i;
  
@@ -4817,11 +6206,11 @@ Setting this to nil does not do anything.
  }
  
  void
  }
  
  void
-complex_vars_of_mule_coding (void)
+complex_vars_of_file_coding (void)
  {
  {
-  staticpro (&Vcoding_system_hashtable);
-  Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
-                                                 HASHTABLE_EQ);
+  staticpro (&Vcoding_system_hash_table);
+  Vcoding_system_hash_table =
+    make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
  
    the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
  
  
    the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
  
@@ -4863,13 +6252,43 @@ complex_vars_of_mule_coding (void)
    DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qdecode);
  #endif /* MULE */
    /* Need to create this here or we're really screwed. */
    DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qdecode);
  #endif /* MULE */
    /* Need to create this here or we're really screwed. */
-  Fmake_coding_system (Qno_conversion, Qno_conversion, build_string ("No conversion"),
-                      list2 (Qmnemonic, build_string ("Noconv")));
+  Fmake_coding_system
+    (Qraw_text, Qno_conversion,
+     build_string ("Raw text, which means it converts only line-break-codes."),
+     list2 (Qmnemonic, build_string ("Raw")));
+
+  Fmake_coding_system
+    (Qbinary, Qno_conversion,
+     build_string ("Binary, which means it does not convert anything."),
+     list4 (Qeol_type, Qlf,
+           Qmnemonic, build_string ("Binary")));
+
+#ifdef UTF2000
+  Fmake_coding_system
+    (Qutf8, Qutf8,
+     build_string ("Coding-system of ISO/IEC 10646 UTF-8."),
+     list2 (Qmnemonic, build_string ("UTF8")));
+#endif
  
  
-  Fcopy_coding_system (Fcoding_system_property (Qno_conversion, Qeol_lf),
-                      Qbinary);
+  Fdefine_coding_system_alias (Qno_conversion, Qraw_text);
  
    /* Need this for bootstrapping */
    coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
  
    /* Need this for bootstrapping */
    coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
-    Fget_coding_system (Qno_conversion);
+    Fget_coding_system (Qraw_text);
+
+#ifdef UTF2000
+  coding_category_system[CODING_CATEGORY_UTF8]
+   = Fget_coding_system (Qutf8);
+#endif
+
+#ifdef MULE
+  {
+    unsigned int i;
+
+    for (i = 0; i < 65536; i++)
+      ucs_to_mule_table[i] = Qnil;
+  }
+  staticpro (&mule_to_ucs_table);
+  mule_to_ucs_table = Fmake_char_table(Qgeneric);
+#endif /* MULE */
  }
  }