Merge CJK Unified Ideographs Extension A.

[chise/xemacs-chise.git-] / src / file-coding.c
diff --git a/src/file-coding.c b/src/file-coding.c

index 48363a4..3068c89 100644 (file)
--- a/src/file-coding.c
+++ b/src/file-coding.c
@@ -25,12 +25,14 @@ Boston, MA 02111-1307, USA.  */
  
  #include <config.h>
  #include "lisp.h"
+
  #include "buffer.h"
  #include "elhash.h"
  #include "insdel.h"
  #include "lstream.h"
  #ifdef MULE
  #include "mule-ccl.h"
+#include "chartab.h"
  #endif
  #include "file-coding.h"
  
@@ -54,7 +56,7 @@ int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
  
  Lisp_Object Qcoding_system_p;
  
-Lisp_Object Qno_conversion, Qccl, Qiso2022;
+Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022;
  /* Qinternal in general.c */
  
  Lisp_Object Qmnemonic, Qeol_type;
@@ -64,6 +66,7 @@ Lisp_Object Qpost_read_conversion;
  Lisp_Object Qpre_write_conversion;
  
  #ifdef MULE
+Lisp_Object Qucs4, Qutf8;
  Lisp_Object Qbig5, Qshift_jis;
  Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
  Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
@@ -75,7 +78,7 @@ Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
  #endif
  Lisp_Object Qencode, Qdecode;
  
-Lisp_Object Vcoding_system_hashtable;
+Lisp_Object Vcoding_system_hash_table;
  
  int enable_multibyte_characters;
  
@@ -103,8 +106,10 @@ struct iso2022_decoder
    /* Index for next byte to store in ISO escape sequence. */
    int esc_bytes_index;
  
+#ifdef ENABLE_COMPOSITE_CHARS
    /* Stuff seen so far when composing a string. */
    unsigned_char_dynarr *composite_chars;
+#endif
  
    /* If we saw an invalid designation sequence for a particular
       register, we flag it here and switch to ASCII.  The next time we
@@ -166,6 +171,24 @@ static void decode_coding_big5 (Lstream *decoding,
  static void encode_coding_big5 (Lstream *encoding,
                                 CONST unsigned char *src,
                                 unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_ucs4 (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_ucs4 (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_ucs4 (Lstream *encoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_utf8 (struct detection_state *st,
+                              CONST unsigned char *src,
+                              unsigned int n);
+static void decode_coding_utf8 (Lstream *decoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_utf8 (Lstream *encoding,
+                               CONST unsigned char *src,
+                               unsigned_char_dynarr *dst, unsigned int n);
  static int postprocess_iso2022_mask (int mask);
  static void reset_iso2022 (Lisp_Object coding_system,
                            struct iso2022_decoder *iso);
@@ -222,22 +245,58 @@ static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object));
  static void print_coding_system (Lisp_Object, Lisp_Object, int);
  static void finalize_coding_system (void *header, int for_disksave);
  
+#ifdef MULE
+static const struct lrecord_description ccs_description_1[] = {
+  { XD_LISP_OBJECT, offsetof(charset_conversion_spec, from_charset), 2 },
+  { XD_END }
+};
+
+static const struct struct_description ccs_description = {
+  sizeof(charset_conversion_spec),
+  ccs_description_1
+};
+  
+static const struct lrecord_description ccsd_description_1[] = {
+  XD_DYNARR_DESC(charset_conversion_spec_dynarr, &ccs_description),
+  { XD_END }
+};
+
+static const struct struct_description ccsd_description = {
+  sizeof(charset_conversion_spec_dynarr),
+  ccsd_description_1
+};
+#endif
+
+static const struct lrecord_description coding_system_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, name), 2 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, mnemonic), 3 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, eol_lf), 3 },
+#ifdef MULE
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, iso2022.initial_charset), 4 },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.input_conv),  1, &ccsd_description },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, ccl.decode), 2 },
+#endif
+  { XD_END }
+};
+
  DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
                                mark_coding_system, print_coding_system,
                                finalize_coding_system,
-                              0, 0, struct Lisp_Coding_System);
+                              0, 0, coding_system_description,
+                              struct Lisp_Coding_System);
  
  static Lisp_Object
  mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
  {
-  struct Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
+  Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
  
-  (markobj) (CODING_SYSTEM_NAME (codesys));
-  (markobj) (CODING_SYSTEM_DOC_STRING (codesys));
-  (markobj) (CODING_SYSTEM_MNEMONIC (codesys));
-  (markobj) (CODING_SYSTEM_EOL_LF (codesys));
-  (markobj) (CODING_SYSTEM_EOL_CRLF (codesys));
-  (markobj) (CODING_SYSTEM_EOL_CR (codesys));
+  markobj (CODING_SYSTEM_NAME (codesys));
+  markobj (CODING_SYSTEM_DOC_STRING (codesys));
+  markobj (CODING_SYSTEM_MNEMONIC (codesys));
+  markobj (CODING_SYSTEM_EOL_LF (codesys));
+  markobj (CODING_SYSTEM_EOL_CRLF (codesys));
+  markobj (CODING_SYSTEM_EOL_CR (codesys));
  
    switch (CODING_SYSTEM_TYPE (codesys))
      {
@@ -245,15 +304,15 @@ mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
        int i;
      case CODESYS_ISO2022:
        for (i = 0; i < 4; i++)
-       (markobj) (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
+       markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
        if (codesys->iso2022.input_conv)
         {
           for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
             {
               struct charset_conversion_spec *ccs =
                 Dynarr_atp (codesys->iso2022.input_conv, i);
-             (markobj) (ccs->from_charset);
-             (markobj) (ccs->to_charset);
+             markobj (ccs->from_charset);
+             markobj (ccs->to_charset);
             }
         }
        if (codesys->iso2022.output_conv)
@@ -262,22 +321,22 @@ mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
             {
               struct charset_conversion_spec *ccs =
                 Dynarr_atp (codesys->iso2022.output_conv, i);
-             (markobj) (ccs->from_charset);
-             (markobj) (ccs->to_charset);
+             markobj (ccs->from_charset);
+             markobj (ccs->to_charset);
             }
         }
        break;
  
      case CODESYS_CCL:
-      (markobj) (CODING_SYSTEM_CCL_DECODE (codesys));
-      (markobj) (CODING_SYSTEM_CCL_ENCODE (codesys));
+      markobj (CODING_SYSTEM_CCL_DECODE (codesys));
+      markobj (CODING_SYSTEM_CCL_ENCODE (codesys));
        break;
  #endif /* MULE */
      default:
        break;
      }
  
-  (markobj) (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
+  markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
    return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
  }
  
@@ -285,7 +344,7 @@ static void
  print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
                      int escapeflag)
  {
-  struct Lisp_Coding_System *c = XCODING_SYSTEM (obj);
+  Lisp_Coding_System *c = XCODING_SYSTEM (obj);
    if (print_readably)
      error ("printing unreadable object #<coding_system 0x%x>",
            c->header.uid);
@@ -298,7 +357,7 @@ print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
  static void
  finalize_coding_system (void *header, int for_disksave)
  {
-  struct Lisp_Coding_System *c = (struct Lisp_Coding_System *) header;
+  Lisp_Coding_System *c = (Lisp_Coding_System *) header;
    /* Since coding systems never go away, this function is not
       necessary.  But it would be necessary if we changed things
       so that coding systems could go away. */
@@ -344,16 +403,16 @@ eol_type_to_symbol (enum eol_type type)
  {
    switch (type)
      {
+    default: abort ();
      case EOL_LF:         return Qlf;
      case EOL_CRLF:       return Qcrlf;
      case EOL_CR:         return Qcr;
      case EOL_AUTODETECT: return Qnil;
-    default:             abort (); return Qnil; /* not reached */
      }
  }
  
  static void
-setup_eol_coding_systems (struct Lisp_Coding_System *codesys)
+setup_eol_coding_systems (Lisp_Coding_System *codesys)
  {
    Lisp_Object codesys_obj;
    int len = string_length (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name);
@@ -439,7 +498,7 @@ associated coding system object is returned.
    else
      CHECK_SYMBOL (coding_system_or_name);
  
-  return Fgethash (coding_system_or_name, Vcoding_system_hashtable, Qnil);
+  return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
  }
  
  DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
@@ -465,19 +524,15 @@ struct coding_system_list_closure
  };
  
  static int
-add_coding_system_to_list_mapper (CONST void *hash_key, void *hash_contents,
+add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
                                   void *coding_system_list_closure)
  {
    /* This function can GC */
-  Lisp_Object key, contents;
-  Lisp_Object *coding_system_list;
    struct coding_system_list_closure *cscl =
      (struct coding_system_list_closure *) coding_system_list_closure;
-  CVOID_TO_LISP (key, hash_key);
-  VOID_TO_LISP (contents, hash_contents);
-  coding_system_list = cscl->coding_system_list;
+  Lisp_Object *coding_system_list = cscl->coding_system_list;
  
-  *coding_system_list = Fcons (XCODING_SYSTEM (contents)->name,
+  *coding_system_list = Fcons (XCODING_SYSTEM (value)->name,
                                *coding_system_list);
    return 0;
  }
@@ -493,7 +548,7 @@ Return a list of the names of all defined coding systems.
  
    GCPRO1 (coding_system_list);
    coding_system_list_closure.coding_system_list = &coding_system_list;
-  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hashtable,
+  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table,
                  &coding_system_list_closure);
    UNGCPRO;
  
@@ -509,11 +564,11 @@ Return the name of the given coding system.
    return XCODING_SYSTEM_NAME (coding_system);
  }
  
-static struct Lisp_Coding_System *
+static Lisp_Coding_System *
  allocate_coding_system (enum coding_system_type type, Lisp_Object name)
  {
-  struct Lisp_Coding_System *codesys =
-    alloc_lcrecord_type (struct Lisp_Coding_System, lrecord_coding_system);
+  Lisp_Coding_System *codesys =
+    alloc_lcrecord_type (Lisp_Coding_System, &lrecord_coding_system);
  
    zero_lcrecord (codesys);
    CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
@@ -612,6 +667,10 @@ nil or 'undecided
       characters will only be present if you explicitly insert them.)
  'shift-jis
       Shift-JIS (a Japanese encoding commonly used in PC operating systems).
+'ucs-4
+     ISO 10646 UCS-4 encoding.
+'utf-8
+     ISO 10646 UTF-8 encoding.
  'iso2022
       Any ISO2022-compliant encoding.  Among other things, this includes
       JIS (the Japanese encoding commonly used for e-mail), EUC (the
@@ -766,7 +825,7 @@ if TYPE is 'ccl:
  */
         (name, type, doc_string, props))
  {
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
    Lisp_Object rest, key, value;
    enum coding_system_type ty;
    int need_to_setup_eol_systems = 1;
@@ -778,6 +837,8 @@ if TYPE is 'ccl:
    else if (EQ (type, Qshift_jis))     { ty = CODESYS_SHIFT_JIS; }
    else if (EQ (type, Qiso2022))       { ty = CODESYS_ISO2022; }
    else if (EQ (type, Qbig5))          { ty = CODESYS_BIG5; }
+  else if (EQ (type, Qucs4))          { ty = CODESYS_UCS4; }
+  else if (EQ (type, Qutf8))          { ty = CODESYS_UTF8; }
    else if (EQ (type, Qccl))           { ty = CODESYS_CCL; }
  #endif
    else if (EQ (type, Qno_conversion)) { ty = CODESYS_NO_CONVERSION; }
@@ -890,7 +951,7 @@ if TYPE is 'ccl:
    {
      Lisp_Object codesys_obj;
      XSETCODING_SYSTEM (codesys_obj, codesys);
-    Fputhash (name, codesys_obj, Vcoding_system_hashtable);
+    Fputhash (name, codesys_obj, Vcoding_system_hash_table);
      return codesys_obj;
    }
  }
@@ -911,12 +972,12 @@ be created.
                          allocate_coding_system
                          (XCODING_SYSTEM_TYPE (old_coding_system),
                           new_name));
-      Fputhash (new_name, new_coding_system, Vcoding_system_hashtable);
+      Fputhash (new_name, new_coding_system, Vcoding_system_hash_table);
      }
  
    {
-    struct Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
-    struct Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
+    Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
+    Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
      memcpy (((char *) to  ) + sizeof (to->header),
             ((char *) from) + sizeof (from->header),
             sizeof (*from) - sizeof (from->header));
@@ -925,10 +986,44 @@ be created.
    return new_coding_system;
  }
  
+DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
+Define symbol ALIAS as an alias for coding system CODING-SYSTEM.
+*/
+       (alias, coding_system))
+{
+  CHECK_SYMBOL (alias);
+  if (!NILP (Ffind_coding_system (alias)))
+    signal_simple_error ("Symbol already names a coding system", alias);
+  coding_system = Fget_coding_system (coding_system);
+  Fputhash (alias, coding_system, Vcoding_system_hash_table);
+
+  /* Set up aliases for subsidiaries. */
+  if (XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
+    {
+      Lisp_Object str;
+      XSETSTRING (str, symbol_name (XSYMBOL (alias)));
+#define FROB(type, name)                                                       \
+      do {                                                                     \
+       Lisp_Object subsidiary = XCODING_SYSTEM_EOL_##type (coding_system);     \
+       if (!NILP (subsidiary))                                                 \
+         Fdefine_coding_system_alias                                           \
+           (Fintern (concat2 (str, build_string (name)), Qnil), subsidiary);   \
+      } while (0)
+      FROB (LF,   "-unix");
+      FROB (CRLF, "-dos");
+      FROB (CR,   "-mac");
+#undef FROB
+    }
+  /* FSF return value is a vector of [ALIAS-unix ALIAS-doc ALIAS-mac],
+     but it doesn't look intentional, so I'd rather return something
+     meaningful or nothing at all. */
+  return Qnil;
+}
+
  static Lisp_Object
  subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
  {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
    Lisp_Object new_coding_system;
  
    if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
@@ -978,22 +1073,21 @@ Return the type of CODING-SYSTEM.
  {
    switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
      {
+    default: abort ();
      case CODESYS_AUTODETECT:   return Qundecided;
  #ifdef MULE
      case CODESYS_SHIFT_JIS:    return Qshift_jis;
      case CODESYS_ISO2022:      return Qiso2022;
      case CODESYS_BIG5:         return Qbig5;
+    case CODESYS_UCS4:         return Qucs4;
+    case CODESYS_UTF8:         return Qutf8;
      case CODESYS_CCL:          return Qccl;
  #endif
      case CODESYS_NO_CONVERSION:        return Qno_conversion;
  #ifdef DEBUG_XEMACS
      case CODESYS_INTERNAL:     return Qinternal;
  #endif
-    default:
-      abort ();
      }
-
-  return Qnil; /* not reached */
  }
  
  #ifdef MULE
@@ -1289,6 +1383,20 @@ struct detection_state
    struct
      {
        int mask;
+      int in_byte;
+  }
+  ucs4;
+
+  struct
+    {
+      int mask;
+      int in_byte;
+    }
+  utf8;
+
+  struct
+    {
+      int mask;
        int initted;
        struct iso2022_decoder iso;
        unsigned int flags;
@@ -1405,6 +1513,8 @@ detect_coding_type (struct detection_state *st, CONST unsigned char *src,
  #ifdef MULE
               st->shift_jis.mask = ~0;
               st->big5.mask = ~0;
+             st->ucs4.mask = ~0;
+             st->utf8.mask = ~0;
               st->iso2022.mask = ~0;
  #endif
               break;
@@ -1421,8 +1531,14 @@ detect_coding_type (struct detection_state *st, CONST unsigned char *src,
      st->shift_jis.mask = detect_coding_sjis (st, src, n);
    if (!mask_has_at_most_one_bit_p (st->big5.mask))
      st->big5.mask = detect_coding_big5 (st, src, n);
-
-  st->mask = st->iso2022.mask | st->shift_jis.mask | st->big5.mask;
+  if (!mask_has_at_most_one_bit_p (st->utf8.mask))
+    st->utf8.mask = detect_coding_utf8 (st, src, n);
+  if (!mask_has_at_most_one_bit_p (st->ucs4.mask))
+    st->ucs4.mask = detect_coding_ucs4 (st, src, n);
+
+  st->mask
+    = st->iso2022.mask | st->shift_jis.mask | st->big5.mask
+    | st->utf8.mask | st->ucs4.mask;
  #endif
    {
      int retval = mask_has_at_most_one_bit_p (st->mask);
@@ -1452,7 +1568,7 @@ coding_system_from_mask (int mask)
             }
         }
        if (NILP (retval))
-       retval = Fget_coding_system (Qno_conversion);
+       retval = Fget_coding_system (Qraw_text);
        return retval;
      }
    else
@@ -1474,7 +1590,7 @@ coding_system_from_mask (int mask)
        if (cat >= 0)
         return coding_category_system[cat];
        else
-       return Fget_coding_system (Qno_conversion);
+       return Fget_coding_system (Qraw_text);
      }
  }
  
@@ -1504,26 +1620,65 @@ determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
    if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
        *eol_type_in_out == EOL_AUTODETECT)
      {
+      unsigned char random_buffer[4096];
+      int nread;
+      Lisp_Object coding_system = Qnil;
  
-      while (1)
+      nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
+      if (nread)
         {
-         unsigned char random_buffer[4096];
-         int nread;
+         unsigned char *cp = random_buffer;
  
-         nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
-         if (!nread)
-           break;
-         if (detect_coding_type (&decst, random_buffer, nread,
-                                 XCODING_SYSTEM_TYPE (*codesys_in_out) !=
-                                 CODESYS_AUTODETECT))
-           break;
-       }
+         while (cp < random_buffer + nread)
+           {
+             if ((*cp++ == 'c') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'o') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'd') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'i') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'n') && (cp < random_buffer + nread) &&
+                 (*cp++ == 'g') && (cp < random_buffer + nread) &&
+                 (*cp++ == ':') && (cp < random_buffer + nread))
+               {
+                 unsigned char coding_system_name[4096 - 6];
+                 unsigned char *np = coding_system_name;
  
+                 while ( (cp < random_buffer + nread)
+                         && ((*cp == ' ') || (*cp == '\t')) )
+                   {
+                     cp++;
+                   }
+                 while ( (cp < random_buffer + nread) &&
+                         (*cp != ' ') && (*cp != '\t') && (*cp != ';') )
+                   {
+                     *np++ = *cp++;
+                   }
+                 *np = 0;
+                 coding_system
+                   = Ffind_coding_system (intern (coding_system_name));
+                 break;
+               }
+           }
+         if (EQ(coding_system, Qnil))
+           do{
+             if (detect_coding_type (&decst, random_buffer, nread,
+                                     XCODING_SYSTEM_TYPE (*codesys_in_out)
+                                     != CODESYS_AUTODETECT))
+               break;
+             nread = Lstream_read (stream,
+                                   random_buffer, sizeof (random_buffer));
+             if (!nread)
+               break;
+           } while(1);
+       }
        *eol_type_in_out = decst.eol_type;
        if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
-       *codesys_in_out = coding_system_from_mask (decst.mask);
+       {
+         if (EQ(coding_system, Qnil))
+           *codesys_in_out = coding_system_from_mask (decst.mask);
+         else
+           *codesys_in_out = coding_system;
+       }
      }
-
    /* If we absolutely can't determine the EOL type, just assume LF. */
    if (*eol_type_in_out == EOL_AUTODETECT)
      *eol_type_in_out = EOL_LF;
@@ -1646,6 +1801,62 @@ do {                                                             \
  /* C should be a binary character in the range 0 - 255; convert
     to internal format and add to Dynarr DST. */
  
+#ifdef UTF2000
+#define DECODE_ADD_BINARY_CHAR(c, dst) \
+do {                                           \
+  if (BYTE_ASCII_P (c))                                \
+    Dynarr_add (dst, c);                       \
+  else                                         \
+    {                                          \
+      Dynarr_add (dst, (c >> 6) | 0xc0);       \
+      Dynarr_add (dst, (c & 0x3f) | 0x80);     \
+    }                                          \
+} while (0)
+
+INLINE void
+DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst)
+{
+  if ( c <= 0x7f )
+    {
+      Dynarr_add (dst, c);
+    }
+  else if ( c <= 0x7ff )
+    {
+      Dynarr_add (dst, (c >> 6) | 0xc0);
+      Dynarr_add (dst, (c & 0x3f) | 0x80);
+    }
+  else if ( c <= 0xffff )
+    {
+      Dynarr_add (dst,  (c >> 12) | 0xe0);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (c >> 18) | 0xf0);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (c >> 24) | 0xf8);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (c >> 30) | 0xfc);
+      Dynarr_add (dst, ((c >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+}
+#else
  #define DECODE_ADD_BINARY_CHAR(c, dst)         \
  do {                                           \
    if (BYTE_ASCII_P (c))                                \
@@ -1661,6 +1872,7 @@ do {                                              \
        Dynarr_add (dst, c);                     \
      }                                          \
  } while (0)
+#endif
  
  #define DECODE_OUTPUT_PARTIAL_CHAR(ch) \
  do {                                   \
@@ -1673,10 +1885,12 @@ do {                                    \
  
  #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)        \
  do {                                   \
-  DECODE_OUTPUT_PARTIAL_CHAR (ch);     \
-  if ((flags & CODING_STATE_END) &&    \
-      (flags & CODING_STATE_CR))       \
-    Dynarr_add (dst, '\r');            \
+  if (flags & CODING_STATE_END)                \
+    {                                  \
+      DECODE_OUTPUT_PARTIAL_CHAR (ch); \
+      if (flags & CODING_STATE_CR)     \
+       Dynarr_add (dst, '\r');         \
+    }                                  \
  } while (0)
  
  #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
@@ -1684,7 +1898,7 @@ do {                                      \
  struct decoding_stream
  {
    /* Coding system that governs the conversion. */
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
  
    /* Stream that we read the encoded data from or
       write the decoded data to. */
@@ -1718,6 +1932,9 @@ struct decoding_stream
    /* Additional information (the state of the running CCL program)
       used by the CCL decoder. */
    struct ccl_program ccl;
+
+  /* counter for UTF-8 or UCS-4 */
+  unsigned char counter;
  #endif
    struct detection_state decst;
  };
@@ -1746,7 +1963,7 @@ decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
       and automatically marked. */
  
    XSETLSTREAM (str_obj, str);
-  (markobj) (str_obj);
+  markobj (str_obj);
    if (str->imp->marker)
      return (str->imp->marker) (str_obj, markobj);
    else
@@ -1852,6 +2069,7 @@ reset_decoding_stream (struct decoding_stream *str)
      {
        setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
      }
+  str->counter = 0;
  #endif /* MULE */
    str->flags = str->ch = 0;
  }
@@ -1890,9 +2108,11 @@ decoding_closer (Lstream *stream)
      }
    Dynarr_free (str->runoff);
  #ifdef MULE
+#ifdef ENABLE_COMPOSITE_CHARS
    if (str->iso2022.composite_chars)
      Dynarr_free (str->iso2022.composite_chars);
  #endif
+#endif
    return Lstream_close (str->other_end);
  }
  
@@ -1909,7 +2129,7 @@ decoding_stream_coding_system (Lstream *stream)
  void
  set_decoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
  {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
    struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
    str->codesys = cs;
    if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
@@ -2026,8 +2246,15 @@ mule_decode (Lstream *decoding, CONST unsigned char *src,
      case CODESYS_BIG5:
        decode_coding_big5 (decoding, src, dst, n);
        break;
+    case CODESYS_UCS4:
+      decode_coding_ucs4 (decoding, src, dst, n);
+      break;
+    case CODESYS_UTF8:
+      decode_coding_utf8 (decoding, src, dst, n);
+      break;
      case CODESYS_CCL:
-      ccl_driver (&str->ccl, src, dst, n, 0);
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING);
        break;
      case CODESYS_ISO2022:
        decode_coding_iso2022 (decoding, src, dst, n);
@@ -2117,7 +2344,7 @@ BUFFER defaults to the current buffer if unspecified.
  struct encoding_stream
  {
    /* Coding system that governs the conversion. */
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
  
    /* Stream that we read the encoded data from or
       write the decoded data to. */
@@ -2192,7 +2419,7 @@ encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
       and automatically marked. */
  
    XSETLSTREAM (str_obj, str);
-  (markobj) (str_obj);
+  markobj (str_obj);
    if (str->imp->marker)
      return (str->imp->marker) (str_obj, markobj);
    else
@@ -2305,7 +2532,11 @@ reset_encoding_stream (struct encoding_stream *str)
         str->iso2022.register_right = 1;
         str->iso2022.current_charset = Qnil;
         str->iso2022.current_half = 0;
+#ifdef UTF2000
+       str->iso2022.current_char_boundary = 0;
+#else
         str->iso2022.current_char_boundary = 1;
+#endif
         break;
        }
      case CODESYS_CCL:
@@ -2368,7 +2599,7 @@ encoding_stream_coding_system (Lstream *stream)
  void
  set_encoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
  {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
    struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
    str->codesys = cs;
    reset_encoding_stream (str);
@@ -2432,8 +2663,15 @@ mule_encode (Lstream *encoding, CONST unsigned char *src,
      case CODESYS_BIG5:
        encode_coding_big5 (encoding, src, dst, n);
        break;
+    case CODESYS_UCS4:
+      encode_coding_ucs4 (encoding, src, dst, n);
+      break;
+    case CODESYS_UTF8:
+      encode_coding_utf8 (encoding, src, dst, n);
+      break;
      case CODESYS_CCL:
-      ccl_driver (&str->ccl, src, dst, n, 0);
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING);
        break;
      case CODESYS_ISO2022:
        encode_coding_iso2022 (encoding, src, dst, n);
@@ -2517,9 +2755,9 @@ text.  BUFFER defaults to the current buffer if unspecified.
  
  /* Shift-JIS is a coding system encoding three character sets: ASCII, right
     half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
-   as is.  A character of JISX0201-Kana (TYPE94 character set) is
+   as is.  A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is
     encoded by "position-code + 0x80".  A character of JISX0208
-   (TYPE94x94 character set) is encoded in 2-byte but two
+   (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two
     position-codes are divided and shifted so that it fit in the range
     below.
  
@@ -2576,12 +2814,10 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
                     unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
  
    while (n--)
      {
@@ -2594,10 +2830,16 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
             {
               unsigned char e1, e2;
  
-             Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
               DECODE_SJIS (ch, c, e1, e2);
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_japanese_jisx0208,
+                                           e1 & 0x7F,
+                                           e2 & 0x7F), dst);
+#else
+             Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
               Dynarr_add (dst, e1);
               Dynarr_add (dst, e2);
+#endif
             }
           else
             {
@@ -2613,8 +2855,13 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
             ch = c;
           else if (BYTE_SJIS_KATAKANA_P (c))
             {
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_katakana_jisx0201,
+                                           c & 0x7F, 0), dst);
+#else
               Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201);
               Dynarr_add (dst, c);
+#endif
             }
           else
             DECODE_ADD_BINARY_CHAR (c, dst);
@@ -2624,7 +2871,8 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  /* Convert internally-formatted data to Shift-JIS. */
@@ -2635,15 +2883,85 @@ encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
  
    while (n--)
      {
        c = *src++;
+#ifdef UTF2000
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         {
+           Lisp_Object charset;
+           unsigned int c1, c2, s1, s2;
+           
+           BREAKUP_CHAR (ch, charset, c1, c2);
+           if (EQ(charset, Vcharset_katakana_jisx0201))
+             {
+               Dynarr_add (dst, c1 | 0x80);
+             }
+           else if (EQ(charset, Vcharset_japanese_jisx0208))
+             {
+               ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+               Dynarr_add (dst, s1);
+               Dynarr_add (dst, s2);
+             }
+         }
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+#else
        if (c == '\n')
         {
           if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
@@ -2680,9 +2998,14 @@ encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
               ch = 0;
             }
         }
+#endif
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
  }
  
  DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
@@ -2748,8 +3071,8 @@ Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
  
     Since the number of characters in Big5 is larger than maximum
     characters in Emacs' charset (96x96), it can't be handled as one
-   charset.  So, in Emacs, Big5 is devided into two: `charset-big5-1'
-   and `charset-big5-2'.  Both <type>s are TYPE94x94.  The former
+   charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
+   and `charset-big5-2'.  Both <type>s are DIMENSION2_CHARS94.  The former
     contains frequently used characters and the latter contains less
     frequently used characters.  */
  
@@ -2865,12 +3188,10 @@ decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
                     unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
  
    while (n--)
      {
@@ -2906,7 +3227,8 @@ decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  /* Convert internally-formatted data to Big5. */
@@ -2915,13 +3237,12 @@ static void
  encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
                     unsigned_char_dynarr *dst, unsigned int n)
  {
+#ifndef UTF2000
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
  
    while (n--)
      {
@@ -2969,7 +3290,9 @@ encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
        ch = 0;
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#endif
  }
  
  
@@ -2990,7 +3313,7 @@ Return the corresponding character.
    if (BYTE_BIG5_TWO_BYTE_1_P (b1) &&
        BYTE_BIG5_TWO_BYTE_2_P (b2))
      {
-      int leading_byte;
+      Charset_ID leading_byte;
        Lisp_Object charset;
        DECODE_BIG5 (b1, b2, leading_byte, c1, c2);
        charset = CHARSET_BY_LEADING_BYTE (leading_byte);
@@ -3024,133 +3347,856 @@ Return the corresponding character code in Big5.
  
  \f
  /************************************************************************/
-/*                           ISO2022 methods                            */
+/*                           UCS-4 methods                              */
+/*                                                                      */
+/*  UCS-4 character codes are implemented as nonnegative integers.      */
+/*                                                                      */
  /************************************************************************/
  
-/* The following note describes the coding system ISO2022 briefly.
-   Since the intention of this note is to help understanding of the
-   programs in this file, some parts are NOT ACCURATE or OVERLY
-   SIMPLIFIED.  For thorough understanding, please refer to the
-   original document of ISO2022.
+Lisp_Object ucs_to_mule_table[65536];
+Lisp_Object mule_to_ucs_table;
  
-   ISO2022 provides many mechanisms to encode several character sets
-   in 7-bit and 8-bit environments.  If one chooses 7-bit environment,
-   all text is encoded by codes of less than 128.  This may make the
-   encoded text a little bit longer, but the text get more stability
-   to pass through several gateways (some of them strip off MSB).
+DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
+Map UCS-4 code CODE to Mule character CHARACTER.
  
-   There are two kind of character sets: control character set and
-   graphic character set.  The former contains control characters such
-   as `newline' and `escape' to provide control functions (control
-   functions are provided also by escape sequence).  The latter
-   contains graphic characters such as 'A' and '-'.  Emacs recognizes
-   two control character sets and many graphic character sets.
+Return T on success, NIL on failure.
+*/
+       (code, character))
+{
+  unsigned int c;
  
-   Graphic character sets are classified into one of four types,
-   according to the dimension and number of characters in the set:
-   TYPE94, TYPE96, TYPE94x94, and TYPE96x96.  In addition, each
-   character set is assigned an identification byte, unique for each
-   type, called "final character" (denoted as <F> hereafter).  The <F>
-   of each character set is decided by ECMA(*) when it is registered
-   in ISO.  Code range of <F> is 0x30..0x7F (0x30..0x3F are for
-   private use only).
+  CHECK_CHAR (character);
+  CHECK_INT (code);
+  c = XINT (code);
  
-   Note (*): ECMA = European Computer Manufacturers Association
+  if (c < sizeof (ucs_to_mule_table))
+    {
+      ucs_to_mule_table[c] = character;
+      return Qt;
+    }
+  else
+    return Qnil;
+}
  
-   Here are examples of graphic character set [NAME(<F>)]:
-       o TYPE94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
-       o TYPE96 -- right-half-of-ISO8859-1('A'), ...
-       o TYPE94x94 -- GB2312('A'), JISX0208('B'), ...
-       o TYPE96x96 -- none for the moment
+static Lisp_Object
+ucs_to_char (unsigned long code)
+{
+  if (code < sizeof (ucs_to_mule_table))
+    {
+      return ucs_to_mule_table[code];
+    }
+  else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
+    {
+      unsigned int c;
+
+      code -= 0xe00000;
+      c = code % (94 * 94);
+      return make_char
+       (MAKE_CHAR (CHARSET_BY_ATTRIBUTES
+                   (CHARSET_TYPE_94X94, code / (94 * 94) + '@',
+                    CHARSET_LEFT_TO_RIGHT),
+                   c / 94 + 33, c % 94 + 33));
+    }
+  else
+    return Qnil;
+}
  
-   A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
-       C0 [0x00..0x1F] -- control character plane 0
-       GL [0x20..0x7F] -- graphic character plane 0
-       C1 [0x80..0x9F] -- control character plane 1
-       GR [0xA0..0xFF] -- graphic character plane 1
+DEFUN ("ucs-char", Fucs_char, 1, 1, 0, /*
+Return Mule character corresponding to UCS code CODE (a positive integer).
+*/
+       (code))
+{
+  CHECK_NATNUM (code);
+  return ucs_to_char (XINT (code));
+}
  
-   A control character set is directly designated and invoked to C0 or
-   C1 by an escape sequence.  The most common case is that:
-   - ISO646's  control character set is designated/invoked to C0, and
-   - ISO6429's control character set is designated/invoked to C1,
-   and usually these designations/invocations are omitted in encoded
-   text.  In a 7-bit environment, only C0 can be used, and a control
-   character for C1 is encoded by an appropriate escape sequence to
-   fit into the environment.  All control characters for C1 are
-   defined to have corresponding escape sequences.
+DEFUN ("set-char-ucs", Fset_char_ucs, 2, 2, 0, /*
+Map Mule character CHARACTER to UCS code CODE (a positive integer).
+*/
+       (character, code))
+{
+  /* #### Isn't this gilding the lily?  Fput_char_table checks its args.
+          Fset_char_ucs is more restrictive on index arg, but should
+          check code arg in a char_table method. */
+  CHECK_CHAR (character);
+  CHECK_NATNUM (code);
+  return Fput_char_table (character, code, mule_to_ucs_table);
+}
  
-   A graphic character set is at first designated to one of four
-   graphic registers (G0 through G3), then these graphic registers are
-   invoked to GL or GR.  These designations and invocations can be
-   done independently.  The most common case is that G0 is invoked to
-   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
-   these invocations and designations are omitted in encoded text.
-   In a 7-bit environment, only GL can be used.
+DEFUN ("char-ucs", Fchar_ucs, 1, 1, 0, /*
+Return the UCS code (a positive integer) corresponding to CHARACTER.
+*/
+       (character))
+{
+  return Fget_char_table (character, mule_to_ucs_table);
+}
  
-   When a graphic character set of TYPE94 or TYPE94x94 is invoked to
-   GL, codes 0x20 and 0x7F of the GL area work as control characters
-   SPACE and DEL respectively, and code 0xA0 and 0xFF of GR area
-   should not be used.
+#ifdef UTF2000
+#define decode_ucs4 DECODE_ADD_UCS_CHAR
+#else
+/* Decode a UCS-4 character into a buffer.  If the lookup fails, use
+   <GETA MARK> (U+3013) of JIS X 0208, which means correct character
+   is not found, instead.
+   #### do something more appropriate (use blob?)
+        Danger, Will Robinson!  Data loss.  Should we signal user? */
+static void
+decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst)
+{
+  Lisp_Object chr = ucs_to_char (ch);
  
-   There are two ways of invocation: locking-shift and single-shift.
-   With locking-shift, the invocation lasts until the next different
-   invocation, whereas with single-shift, the invocation works only
-   for the following character and doesn't affect locking-shift.
-   Invocations are done by the following control characters or escape
-   sequences.
+  if (! NILP (chr))
+    {
+      Bufbyte work[MAX_EMCHAR_LEN];
+      int len;
+
+      ch = XCHAR (chr);
+      len = (ch < 128) ?
+       simple_set_charptr_emchar (work, ch) :
+       non_ascii_set_charptr_emchar (work, ch);
+      Dynarr_add_many (dst, work, len);
+    }
+  else
+    {
+      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
+      Dynarr_add (dst, 34 + 128);
+      Dynarr_add (dst, 46 + 128);
+    }
+}
+#endif
  
-   ----------------------------------------------------------------------
-   abbrev  function                 cntrl escape seq   description
-   ----------------------------------------------------------------------
-   SI/LS0  (shift-in)               0x0F  none         invoke G0 into GL
-   SO/LS1  (shift-out)              0x0E  none         invoke G1 into GL
-   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR
-   LS2     (locking-shift-2)        none  ESC 'n'      invoke G2 into GL
-   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR
-   LS3     (locking-shift-3)        none  ESC 'o'      invoke G3 into GL
-   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR
-   SS2     (single-shift-2)         0x8E  ESC 'N'      invoke G2 for one char
-   SS3     (single-shift-3)         0x8F  ESC 'O'      invoke G3 for one char
-   ----------------------------------------------------------------------
-   The first four are for locking-shift.  Control characters for these
-   functions are defined by macros ISO_CODE_XXX in `coding.h'.
+static unsigned long
+mule_char_to_ucs4 (Lisp_Object charset,
+                  unsigned char h, unsigned char l)
+{
+  Lisp_Object code
+    = Fget_char_table (make_char (MAKE_CHAR (charset, h & 127, l & 127)),
+                      mule_to_ucs_table);
  
-   Designations are done by the following escape sequences.
-   ----------------------------------------------------------------------
-   escape sequence     description
-   ----------------------------------------------------------------------
-   ESC '(' <F>         designate TYPE94<F> to G0
-   ESC ')' <F>         designate TYPE94<F> to G1
-   ESC '*' <F>         designate TYPE94<F> to G2
-   ESC '+' <F>         designate TYPE94<F> to G3
-   ESC ',' <F>         designate TYPE96<F> to G0 (*)
-   ESC '-' <F>         designate TYPE96<F> to G1
-   ESC '.' <F>         designate TYPE96<F> to G2
-   ESC '/' <F>         designate TYPE96<F> to G3
-   ESC '$' '(' <F>     designate TYPE94x94<F> to G0 (**)
-   ESC '$' ')' <F>     designate TYPE94x94<F> to G1
-   ESC '$' '*' <F>     designate TYPE94x94<F> to G2
-   ESC '$' '+' <F>     designate TYPE94x94<F> to G3
-   ESC '$' ',' <F>     designate TYPE96x96<F> to G0 (*)
-   ESC '$' '-' <F>     designate TYPE96x96<F> to G1
-   ESC '$' '.' <F>     designate TYPE96x96<F> to G2
-   ESC '$' '/' <F>     designate TYPE96x96<F> to G3
-   ----------------------------------------------------------------------
-   In this list, "TYPE94<F>" means a graphic character set of type TYPE94
-   and final character <F>, and etc.
+  if (INTP (code))
+    {
+      return XINT (code);
+    }
+  else if ( (XCHARSET_DIMENSION (charset) == 2) &&
+           (XCHARSET_CHARS (charset) == 94) )
+    {
+      unsigned char final = XCHARSET_FINAL (charset);
  
-   Note (*): Although these designations are not allowed in ISO2022,
-   Emacs accepts them on decoding, and produces them on encoding
-   TYPE96 or TYPE96x96 character set in a coding system which is
-   characterized as 7-bit environment, non-locking-shift, and
-   non-single-shift.
+      if ( ('@' <= final) && (final < 0x7f) )
+       {
+         return 0xe00000 + (final - '@') * 94 * 94
+           + ((h & 127) - 33) * 94 + (l & 127) - 33;
+       }
+      else
+       {
+         return '?';
+       }
+    }
+  else
+    {
+      return '?';
+    }
+}
  
-   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
-   '(' can be omitted.  We call this as "short-form" here after.
+static void
+encode_ucs4 (Lisp_Object charset,
+            unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
+{
+  unsigned long code = mule_char_to_ucs4 (charset, h, l);
+  Dynarr_add (dst,  code >> 24);
+  Dynarr_add (dst, (code >> 16) & 255);
+  Dynarr_add (dst, (code >>  8) & 255);
+  Dynarr_add (dst,  code        & 255);
+}
  
-   Now you may notice that there are a lot of ways for encoding the
+static int
+detect_coding_ucs4 (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  while (n--)
+    {
+      int c = *src++;
+      switch (st->ucs4.in_byte)
+       {
+       case 0:
+         if (c >= 128)
+           return 0;
+         else
+           st->ucs4.in_byte++;
+         break;
+       case 3:
+         st->ucs4.in_byte = 0;
+         break;
+       default:
+         st->ucs4.in_byte++;
+       }
+    }
+  return CODING_CATEGORY_UCS4_MASK;
+}
+
+static void
+decode_coding_ucs4 (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags = str->flags;
+  unsigned int ch    = str->ch;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+       {
+       case 0:
+         ch = c;
+         counter = 3;
+         break;
+       case 1:
+         decode_ucs4 ( ( ch << 8 ) | c, dst);
+         ch = 0;
+         counter = 0;
+         break;
+       default:
+         ch = ( ch << 8 ) | c;
+         counter--;
+       }
+    }
+  if (counter & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+static void
+encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+#ifndef UTF2000
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags = str->flags;
+  unsigned int ch = str->ch;
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+  Lisp_Object charset = str->iso2022.current_charset;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* flags for handling composite chars.  We do a little switcharoo
+     on the source while we're outputting the composite char. */
+  unsigned int saved_n = 0;
+  CONST unsigned char *saved_src = NULL;
+  int in_composite = 0;
+
+ back_to_square_n:
+#endif
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+
+      if (BYTE_ASCII_P (c))
+       {               /* Processing ASCII character */
+         ch = 0;
+         encode_ucs4 (Vcharset_ascii, c, 0, dst);
+         char_boundary = 1;
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+       { /* Processing Leading Byte */
+         ch = 0;
+         charset = CHARSET_BY_LEADING_BYTE (c);
+         if (LEADING_BYTE_PREFIX_P(c))
+           ch = c;
+         char_boundary = 0;
+       }
+      else
+       {                       /* Processing Non-ASCII character */
+         char_boundary = 1;
+         if (EQ (charset, Vcharset_control_1))
+           {
+             encode_ucs4 (Vcharset_control_1, c, 0, dst);
+           }
+         else
+           {
+             switch (XCHARSET_REP_BYTES (charset))
+               {
+               case 2:
+                 encode_ucs4 (charset, c, 0, dst);
+                 break;
+               case 3:
+                 if (XCHARSET_PRIVATE_P (charset))
+                   {
+                     encode_ucs4 (charset, c, 0, dst);
+                     ch = 0;
+                   }
+                 else if (ch)
+                   {
+#ifdef ENABLE_COMPOSITE_CHARS
+                     if (EQ (charset, Vcharset_composite))
+                       {
+                         if (in_composite)
+                           {
+                             /* #### Bother! We don't know how to
+                                handle this yet. */
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, 0);
+                             Dynarr_add (dst, '~');
+                           }
+                         else
+                           {
+                             Emchar emch = MAKE_CHAR (Vcharset_composite,
+                                                      ch & 0x7F, c & 0x7F);
+                             Lisp_Object lstr = composite_char_string (emch);
+                             saved_n = n;
+                             saved_src = src;
+                             in_composite = 1;
+                             src = XSTRING_DATA   (lstr);
+                             n   = XSTRING_LENGTH (lstr);
+                           }
+                       }
+                     else
+#endif /* ENABLE_COMPOSITE_CHARS */
+                       {
+                         encode_ucs4(charset, ch, c, dst);
+                       }
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               case 4:
+                 if (ch)
+                   {
+                     encode_ucs4 (charset, ch, c, dst);
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               default:
+                 abort ();
+               }
+           }
+       }
+    }
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (in_composite)
+    {
+      n = saved_n;
+      src = saved_src;
+      in_composite = 0;
+      goto back_to_square_n; /* Wheeeeeeeee ..... */
+    }
+#endif /* ENABLE_COMPOSITE_CHARS */
+
+  str->flags = flags;
+  str->ch = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+  str->iso2022.current_charset = charset;
+
+  /* Verbum caro factum est! */
+#endif
+}
+
+\f
+/************************************************************************/
+/*                           UTF-8 methods                              */
+/************************************************************************/
+
+static int
+detect_coding_utf8 (struct detection_state *st, CONST unsigned char *src,
+                   unsigned int n)
+{
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (st->utf8.in_byte)
+       {
+       case 0:
+         if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+           return 0;
+         else if (c >= 0xfc)
+           st->utf8.in_byte = 5;
+         else if (c >= 0xf8)
+           st->utf8.in_byte = 4;
+         else if (c >= 0xf0)
+           st->utf8.in_byte = 3;
+         else if (c >= 0xe0)
+           st->utf8.in_byte = 2;
+         else if (c >= 0xc0)
+           st->utf8.in_byte = 1;
+         else if (c >= 0x80)
+           return 0;
+         break;
+       default:
+         if ((c & 0xc0) != 0x80)
+           return 0;
+         else
+           st->utf8.in_byte--;
+       }
+    }
+  return CODING_CATEGORY_UTF8_MASK;
+}
+
+static void
+decode_coding_utf8 (Lstream *decoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             counter = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             counter = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             counter = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             counter = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             counter = 1;
+           }
+         else
+           {
+             DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+             decode_ucs4 (c, dst);
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         decode_ucs4 (ch, dst);
+         ch = 0;
+         counter = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         counter--;
+       }
+    label_continue_loop:;
+    }
+
+  if (flags & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+#ifndef UTF2000
+static void
+encode_utf8 (Lisp_Object charset,
+            unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
+{
+  unsigned long code = mule_char_to_ucs4 (charset, h, l);
+  if ( code <= 0x7f )
+    {
+      Dynarr_add (dst, code);
+    }
+  else if ( code <= 0x7ff )
+    {
+      Dynarr_add (dst, (code >> 6) | 0xc0);
+      Dynarr_add (dst, (code & 0x3f) | 0x80);
+    }
+  else if ( code <= 0xffff )
+    {
+      Dynarr_add (dst,  (code >> 12) | 0xe0);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (code >> 18) | 0xf0);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (code >> 24) | 0xf8);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (code >> 30) | 0xfc);
+      Dynarr_add (dst, ((code >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+}
+#endif
+
+static void
+encode_coding_utf8 (Lstream *encoding, CONST unsigned char *src,
+                   unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#ifdef UTF2000
+
+  while (n--)
+    {
+      unsigned char c = *src++;          
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             Dynarr_add (dst, c);
+             char_boundary = 1;
+           }
+         else
+           {
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         Dynarr_add (dst, c);
+         char_boundary = 0;
+         break;
+       default:
+         Dynarr_add (dst, c);
+         char_boundary--;
+       }
+    }
+#else /* not UTF2000 */
+  Lisp_Object charset = str->iso2022.current_charset;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* flags for handling composite chars.  We do a little switcharoo
+     on the source while we're outputting the composite char. */
+  unsigned int saved_n = 0;
+  CONST unsigned char *saved_src = NULL;
+  int in_composite = 0;
+
+ back_to_square_n:
+#endif /* ENABLE_COMPOSITE_CHARS */
+  
+  while (n--)
+    {
+      unsigned char c = *src++;
+
+      if (BYTE_ASCII_P (c))
+       {               /* Processing ASCII character */
+         ch = 0;
+         if (c == '\n')
+           {
+             if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+               Dynarr_add (dst, '\r');
+             if (eol_type != EOL_CR)
+               Dynarr_add (dst, c);
+           }
+         else
+           encode_utf8 (Vcharset_ascii, c, 0, dst);
+         char_boundary = 1;
+       }
+      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+       { /* Processing Leading Byte */
+         ch = 0;
+         charset = CHARSET_BY_LEADING_BYTE (c);
+         if (LEADING_BYTE_PREFIX_P(c))
+           ch = c;
+         char_boundary = 0;
+       }
+      else
+       {                       /* Processing Non-ASCII character */
+         char_boundary = 1;
+         if (EQ (charset, Vcharset_control_1))
+           {
+             encode_utf8 (Vcharset_control_1, c, 0, dst);
+           }
+         else
+           {
+             switch (XCHARSET_REP_BYTES (charset))
+               {
+               case 2:
+                 encode_utf8 (charset, c, 0, dst);
+                 break;
+               case 3:
+                 if (XCHARSET_PRIVATE_P (charset))
+                   {
+                     encode_utf8 (charset, c, 0, dst);
+                     ch = 0;
+                   }
+                 else if (ch)
+                   {
+#ifdef ENABLE_COMPOSITE_CHARS
+                     if (EQ (charset, Vcharset_composite))
+                       {
+                         if (in_composite)
+                           {
+                             /* #### Bother! We don't know how to
+                                handle this yet. */
+                             encode_utf8 (Vcharset_ascii, '~', 0, dst);
+                           }
+                         else
+                           {
+                             Emchar emch = MAKE_CHAR (Vcharset_composite,
+                                                      ch & 0x7F, c & 0x7F);
+                             Lisp_Object lstr = composite_char_string (emch);
+                             saved_n = n;
+                             saved_src = src;
+                             in_composite = 1;
+                             src = XSTRING_DATA   (lstr);
+                             n   = XSTRING_LENGTH (lstr);
+                           }
+                       }
+                     else
+#endif /* ENABLE_COMPOSITE_CHARS */
+                       {
+                         encode_utf8 (charset, ch, c, dst);
+                       }
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               case 4:
+                 if (ch)
+                   {
+                     encode_utf8 (charset, ch, c, dst);
+                     ch = 0;
+                   }
+                 else
+                   {
+                     ch = c;
+                     char_boundary = 0;
+                   }
+                 break;
+               default:
+                 abort ();
+               }
+           }
+       }
+    }
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (in_composite)
+    {
+      n = saved_n;
+      src = saved_src;
+      in_composite = 0;
+      goto back_to_square_n; /* Wheeeeeeeee ..... */
+    }
+#endif
+
+#endif /* not UTF2000 */
+  str->flags = flags;
+  str->ch    = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+#ifndef UTF2000
+  str->iso2022.current_charset = charset;
+#endif
+
+  /* Verbum caro factum est! */
+}
+
+\f
+/************************************************************************/
+/*                           ISO2022 methods                            */
+/************************************************************************/
+
+/* The following note describes the coding system ISO2022 briefly.
+   Since the intention of this note is to help understand the
+   functions in this file, some parts are NOT ACCURATE or OVERLY
+   SIMPLIFIED.  For thorough understanding, please refer to the
+   original document of ISO2022.
+
+   ISO2022 provides many mechanisms to encode several character sets
+   in 7-bit and 8-bit environments.  For 7-bit environments, all text
+   is encoded using bytes less than 128.  This may make the encoded
+   text a little bit longer, but the text passes more easily through
+   several gateways, some of which strip off MSB (Most Signigant Bit).
+
+   There are two kinds of character sets: control character set and
+   graphic character set.  The former contains control characters such
+   as `newline' and `escape' to provide control functions (control
+   functions are also provided by escape sequences).  The latter
+   contains graphic characters such as 'A' and '-'.  Emacs recognizes
+   two control character sets and many graphic character sets.
+
+   Graphic character sets are classified into one of the following
+   four classes, according to the number of bytes (DIMENSION) and
+   number of characters in one dimension (CHARS) of the set:
+   - DIMENSION1_CHARS94
+   - DIMENSION1_CHARS96
+   - DIMENSION2_CHARS94
+   - DIMENSION2_CHARS96
+
+   In addition, each character set is assigned an identification tag,
+   unique for each set, called "final character" (denoted as <F>
+   hereafter).  The <F> of each character set is decided by ECMA(*)
+   when it is registered in ISO.  The code range of <F> is 0x30..0x7F
+   (0x30..0x3F are for private use only).
+
+   Note (*): ECMA = European Computer Manufacturers Association
+
+   Here are examples of graphic character set [NAME(<F>)]:
+       o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
+       o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
+       o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
+       o DIMENSION2_CHARS96 -- none for the moment
+
+   A code area (1 byte = 8 bits) is divided into 4 areas, C0, GL, C1, and GR.
+       C0 [0x00..0x1F] -- control character plane 0
+       GL [0x20..0x7F] -- graphic character plane 0
+       C1 [0x80..0x9F] -- control character plane 1
+       GR [0xA0..0xFF] -- graphic character plane 1
+
+   A control character set is directly designated and invoked to C0 or
+   C1 by an escape sequence.  The most common case is that:
+   - ISO646's  control character set is designated/invoked to C0, and
+   - ISO6429's control character set is designated/invoked to C1,
+   and usually these designations/invocations are omitted in encoded
+   text.  In a 7-bit environment, only C0 can be used, and a control
+   character for C1 is encoded by an appropriate escape sequence to
+   fit into the environment.  All control characters for C1 are
+   defined to have corresponding escape sequences.
+
+   A graphic character set is at first designated to one of four
+   graphic registers (G0 through G3), then these graphic registers are
+   invoked to GL or GR.  These designations and invocations can be
+   done independently.  The most common case is that G0 is invoked to
+   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
+   these invocations and designations are omitted in encoded text.
+   In a 7-bit environment, only GL can be used.
+
+   When a graphic character set of CHARS94 is invoked to GL, codes
+   0x20 and 0x7F of the GL area work as control characters SPACE and
+   DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
+   be used.
+
+   There are two ways of invocation: locking-shift and single-shift.
+   With locking-shift, the invocation lasts until the next different
+   invocation, whereas with single-shift, the invocation affects the
+   following character only and doesn't affect the locking-shift
+   state.  Invocations are done by the following control characters or
+   escape sequences:
+
+   ----------------------------------------------------------------------
+   abbrev  function                 cntrl escape seq   description
+   ----------------------------------------------------------------------
+   SI/LS0  (shift-in)               0x0F  none         invoke G0 into GL
+   SO/LS1  (shift-out)              0x0E  none         invoke G1 into GL
+   LS2     (locking-shift-2)        none  ESC 'n'      invoke G2 into GL
+   LS3     (locking-shift-3)        none  ESC 'o'      invoke G3 into GL
+   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
+   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
+   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
+   SS2     (single-shift-2)         0x8E  ESC 'N'      invoke G2 for one char
+   SS3     (single-shift-3)         0x8F  ESC 'O'      invoke G3 for one char
+   ----------------------------------------------------------------------
+   (*) These are not used by any known coding system.
+
+   Control characters for these functions are defined by macros
+   ISO_CODE_XXX in `coding.h'.
+
+   Designations are done by the following escape sequences:
+   ----------------------------------------------------------------------
+   escape sequence     description
+   ----------------------------------------------------------------------
+   ESC '(' <F>         designate DIMENSION1_CHARS94<F> to G0
+   ESC ')' <F>         designate DIMENSION1_CHARS94<F> to G1
+   ESC '*' <F>         designate DIMENSION1_CHARS94<F> to G2
+   ESC '+' <F>         designate DIMENSION1_CHARS94<F> to G3
+   ESC ',' <F>         designate DIMENSION1_CHARS96<F> to G0 (*)
+   ESC '-' <F>         designate DIMENSION1_CHARS96<F> to G1
+   ESC '.' <F>         designate DIMENSION1_CHARS96<F> to G2
+   ESC '/' <F>         designate DIMENSION1_CHARS96<F> to G3
+   ESC '$' '(' <F>     designate DIMENSION2_CHARS94<F> to G0 (**)
+   ESC '$' ')' <F>     designate DIMENSION2_CHARS94<F> to G1
+   ESC '$' '*' <F>     designate DIMENSION2_CHARS94<F> to G2
+   ESC '$' '+' <F>     designate DIMENSION2_CHARS94<F> to G3
+   ESC '$' ',' <F>     designate DIMENSION2_CHARS96<F> to G0 (*)
+   ESC '$' '-' <F>     designate DIMENSION2_CHARS96<F> to G1
+   ESC '$' '.' <F>     designate DIMENSION2_CHARS96<F> to G2
+   ESC '$' '/' <F>     designate DIMENSION2_CHARS96<F> to G3
+   ----------------------------------------------------------------------
+
+   In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
+   of dimension 1, chars 94, and final character <F>, etc...
+
+   Note (*): Although these designations are not allowed in ISO2022,
+   Emacs accepts them on decoding, and produces them on encoding
+   CHARS96 character sets in a coding system which is characterized as
+   7-bit environment, non-locking-shift, and non-single-shift.
+
+   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
+   '(' can be omitted.  We refer to this as "short-form" hereafter.
+
+   Now you may notice that there are a lot of ways for encoding the
     same multilingual text in ISO2022.  Actually, there exist many
-   coding systems such as Compound Text (used in X's inter client
+   coding systems such as Compound Text (used in X11's inter client
     communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
     (used in Korean internet), EUC (Extended UNIX Code, used in Asian
     localized platforms), and all of these are variants of ISO2022.
@@ -3159,19 +4205,19 @@ Return the corresponding character code in Big5.
     sequences: ISO6429's direction specification and Emacs' private
     sequence for specifying character composition.
  
-   ISO6429's direction specification takes the following format:
+   ISO6429's direction specification takes the following form:
         o CSI ']'      -- end of the current direction
         o CSI '0' ']'  -- end of the current direction
         o CSI '1' ']'  -- start of left-to-right text
         o CSI '2' ']'  -- start of right-to-left text
     The control character CSI (0x9B: control sequence introducer) is
-   abbreviated to the escape sequence ESC '[' in 7-bit environment.
+   abbreviated to the escape sequence ESC '[' in a 7-bit environment.
  
-   Character composition specification takes the following format:
+   Character composition specification takes the following form:
         o ESC '0' -- start character composition
         o ESC '1' -- end character composition
-   Since these are not standard escape sequences of any ISO, the use
-   of them for these meanings is restricted to Emacs only.  */
+   Since these are not standard escape sequences of any ISO standard,
+   their use with these meanings is restricted to Emacs only.  */
  
  static void
  reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
@@ -3195,8 +4241,10 @@ reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
    iso->invalid_switch_dir = 0;
    iso->output_direction_sequence = 0;
    iso->output_literally = 0;
+#ifdef ENABLE_COMPOSITE_CHARS
    if (iso->composite_chars)
      Dynarr_reset (iso->composite_chars);
+#endif
  }
  
  static int
@@ -3324,6 +4372,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
           reg = 3; half = 1;
           goto locking_shift;
  
+#ifdef ENABLE_COMPOSITE_CHARS
           /**** composite ****/
  
         case '0':
@@ -3337,6 +4386,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
           *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
             ~CODING_STATE_COMPOSITE;
           return 1;
+#endif /* ENABLE_COMPOSITE_CHARS */
  
           /**** directionality ****/
  
@@ -3593,11 +4643,15 @@ static int
  detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
                        unsigned int n)
  {
-  int c;
    int mask;
  
    /* #### There are serious deficiencies in the recognition mechanism
-     here.  This needs to be much smarter if it's going to cut it. */
+     here.  This needs to be much smarter if it's going to cut it.
+     The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while
+     it should be detected as Latin-1.
+     All the ISO2022 stuff in this file should be synced up with the
+     code from FSF Emacs-20.4, in which Mule should be more or less stable.
+     Perhaps we should wait till R2L works in FSF Emacs? */
  
    if (!st->iso2022.initted)
      {
@@ -3617,7 +4671,7 @@ detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
  
    while (n--)
      {
-      c = *src++;
+      int c = *src++;
        if (c >= 0xA0)
         {
           mask &= ~CODING_CATEGORY_ISO_7_MASK;
@@ -3716,7 +4770,7 @@ postprocess_iso2022_mask (int mask)
     need to handle the CSI differently. */
  
  static void
-restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
+restore_left_to_right_direction (Lisp_Coding_System *codesys,
                                  unsigned_char_dynarr *dst,
                                  unsigned int *flags,
                                  int internal_p)
@@ -3747,7 +4801,7 @@ restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
     need to handle the CSI differently. */
  
  static void
-ensure_correct_direction (int direction, struct Lisp_Coding_System *codesys,
+ensure_correct_direction (int direction, Lisp_Coding_System *codesys,
                           unsigned_char_dynarr *dst, unsigned int *flags,
                           int internal_p)
  {
@@ -3780,23 +4834,25 @@ static void
  decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                        unsigned_char_dynarr *dst, unsigned int n)
  {
-  unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-  Lisp_Object coding_system;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+#ifdef ENABLE_COMPOSITE_CHARS
    unsigned_char_dynarr *real_dst = dst;
+#endif
+  Lisp_Object coding_system;
  
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
    XSETCODING_SYSTEM (coding_system, str->codesys);
  
+#ifdef ENABLE_COMPOSITE_CHARS
    if (flags & CODING_STATE_COMPOSITE)
      dst = str->iso2022.composite_chars;
+#endif /* ENABLE_COMPOSITE_CHARS */
  
    while (n--)
      {
-      c = *src++;
+      unsigned char c = *src++;
        if (flags & CODING_STATE_ESCAPE)
         {       /* Within ESC sequence */
           int retval = parse_iso2022_esc (coding_system, &str->iso2022,
@@ -3806,6 +4862,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
             {
               switch (str->iso2022.esc)
                 {
+#ifdef ENABLE_COMPOSITE_CHARS
                 case ISO_ESC_START_COMPOSITE:
                   if (str->iso2022.composite_chars)
                     Dynarr_reset (str->iso2022.composite_chars);
@@ -3824,6 +4881,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                     Dynarr_add_many (dst, comstr, len);
                     break;
                   }
+#endif /* ENABLE_COMPOSITE_CHARS */
  
                 case ISO_ESC_LITERAL:
                   DECODE_ADD_BINARY_CHAR (c, dst);
@@ -3898,7 +4956,9 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
        else
         {                       /* Graphic characters */
           Lisp_Object charset;
-         int lb;
+#ifndef UTF2000
+         Charset_ID lb;
+#endif
           int reg;
  
           DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
@@ -3911,7 +4971,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
           charset = str->iso2022.charset[reg];
  
           /* Error checking: */
-         if (NILP (charset) || str->iso2022.invalid_designated[reg]
+         if (! CHARSETP (charset)
+             || str->iso2022.invalid_designated[reg]
               || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
                   && XCHARSET_CHARS (charset) == 94))
             /* Mrmph.  We are trying to invoke a register that has no
@@ -3940,6 +5001,22 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                     charset = new_charset;
                 }
  
+#ifdef UTF2000
+             if (XCHARSET_DIMENSION (charset) == 1)
+               {
+                 DECODE_OUTPUT_PARTIAL_CHAR (ch);
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+               }
+             else if (ch)
+               {
+                 DECODE_ADD_UCS_CHAR
+                   (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+                 ch = 0;
+               }
+             else
+               ch = c;
+#else
               lb = XCHARSET_LEADING_BYTE (charset);
               switch (XCHARSET_REP_BYTES (charset))
                 {
@@ -3988,6 +5065,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                   else
                     ch = c;
                 }
+#endif
             }
  
           if (!ch)
@@ -4000,7 +5078,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
    if (flags & CODING_STATE_END)
      DECODE_OUTPUT_PARTIAL_CHAR (ch);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  
@@ -4012,7 +5091,8 @@ static void
  iso2022_designate (Lisp_Object charset, unsigned char reg,
                    struct encoding_stream *str, unsigned_char_dynarr *dst)
  {
-  CONST char *inter94 = "()*+", *inter96= ",-./";
+  static CONST char inter94[] = "()*+";
+  static CONST char inter96[] = ",-./";
    unsigned int type;
    unsigned char final;
    Lisp_Object old_charset = str->iso2022.charset[reg];
@@ -4100,28 +5180,244 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
                        unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char charmask, c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    unsigned char char_boundary;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  struct Lisp_Coding_System *codesys = str->codesys;
+  unsigned int flags          = str->flags;
+  Emchar ch                   = str->ch;
+  Lisp_Coding_System *codesys = str->codesys;
+  eol_type_t eol_type         = CODING_SYSTEM_EOL_TYPE (str->codesys);
    int i;
    Lisp_Object charset;
    int half;
+#ifdef UTF2000
+  unsigned int byte1, byte2;
+#endif
  
+#ifdef ENABLE_COMPOSITE_CHARS
    /* flags for handling composite chars.  We do a little switcharoo
       on the source while we're outputting the composite char. */
    unsigned int saved_n = 0;
    CONST unsigned char *saved_src = NULL;
    int in_composite = 0;
+#endif /* ENABLE_COMPOSITE_CHARS */
  
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
    char_boundary = str->iso2022.current_char_boundary;
    charset = str->iso2022.current_charset;
    half = str->iso2022.current_half;
  
+#ifdef ENABLE_COMPOSITE_CHARS
   back_to_square_n:
+#endif
+#ifdef UTF2000
+  while (n--)
+    {
+      c = *src++;
+
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+
+             restore_left_to_right_direction (codesys, dst, &flags, 0);
+             
+             /* Make sure G0 contains ASCII */
+             if ((c > ' ' && c < ISO_CODE_DEL) ||
+                 !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys))
+               {
+                 ensure_normal_shift (str, dst);
+                 iso2022_designate (Vcharset_ascii, 0, str, dst);
+               }
+             
+             /* If necessary, restore everything to the default state
+                at end-of-line */
+             if (c == '\n' &&
+                 !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys)))
+               {
+                 restore_left_to_right_direction (codesys, dst, &flags, 0);
+
+                 ensure_normal_shift (str, dst);
+
+                 for (i = 0; i < 4; i++)
+                   {
+                     Lisp_Object initial_charset =
+                       CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+                     iso2022_designate (initial_charset, i, str, dst);
+                   }
+               }
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               {
+                 if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+                     && fit_to_be_escape_quoted (c))
+                   Dynarr_add (dst, ISO_CODE_ESC);
+                 Dynarr_add (dst, c);
+               }
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         
+         char_boundary = 0;
+         if ( (0x80 <= ch) && (ch <= 0x9f) )
+           {
+             charmask = (half == 0 ? 0x00 : 0x80);
+         
+             if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+                 && fit_to_be_escape_quoted (ch))
+               Dynarr_add (dst, ISO_CODE_ESC);
+             /* you asked for it ... */
+             Dynarr_add (dst, ch);
+           }
+         else
+           {
+             int reg;
+
+             BREAKUP_CHAR (ch, charset, byte1, byte2);
+             ensure_correct_direction (XCHARSET_DIRECTION (charset),
+                                       codesys, dst, &flags, 0);
+
+             /* Now determine which register to use. */
+             reg = -1;
+             for (i = 0; i < 4; i++)
+               {
+                 if (EQ (charset, str->iso2022.charset[i]) ||
+                     EQ (charset,
+                         CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)))
+                   {
+                     reg = i;
+                     break;
+                   }
+               }
+             
+             if (reg == -1)
+               {
+                 if (XCHARSET_GRAPHIC (charset) != 0)
+                   {
+                     if (!NILP (str->iso2022.charset[1]) &&
+                         (!CODING_SYSTEM_ISO2022_SEVEN (codesys) ||
+                          CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys)))
+                       reg = 1;
+                     else if (!NILP (str->iso2022.charset[2]))
+                       reg = 2;
+                     else if (!NILP (str->iso2022.charset[3]))
+                       reg = 3;
+                     else
+                       reg = 0;
+                   }
+                 else
+                   reg = 0;
+               }
+             
+             iso2022_designate (charset, reg, str, dst);
+             
+             /* Now invoke that register. */
+             switch (reg)
+               {
+               case 0:
+                 ensure_normal_shift (str, dst);
+                 half = 0;
+                 break;
+                 
+               case 1:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+                   {
+                     ensure_shift_out (str, dst);
+                     half = 0;
+                   }
+                 else
+                   half = 1;
+                 break;
+                 
+               case 2:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+                   {
+                     Dynarr_add (dst, ISO_CODE_ESC);
+                     Dynarr_add (dst, 'N');
+                     half = 0;
+                   }
+                 else
+                   {
+                     Dynarr_add (dst, ISO_CODE_SS2);
+                     half = 1;
+                   }
+                 break;
+                 
+               case 3:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+                   {
+                     Dynarr_add (dst, ISO_CODE_ESC);
+                     Dynarr_add (dst, 'O');
+                     half = 0;
+                   }
+                 else
+                   {
+                     Dynarr_add (dst, ISO_CODE_SS3);
+                     half = 1;
+                   }
+                 break;
+                 
+               default:
+                 abort ();
+               }
+             
+             charmask = (half == 0 ? 0x00 : 0x80);
+             
+             switch (XCHARSET_DIMENSION (charset))
+               {
+               case 1:
+                 Dynarr_add (dst, byte1 | charmask);
+                 break;
+               case 2:
+                 Dynarr_add (dst, byte1 | charmask);
+                 Dynarr_add (dst, byte2 | charmask);
+                 break;
+               default:
+                 abort ();
+               }
+           }
+         ch =0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+    }
+#else /* not UTF2000 */
+
    while (n--)
      {
        c = *src++;
@@ -4180,7 +5476,10 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
           if (LEADING_BYTE_PREFIX_P(c))
             ch = c;
           else if (!EQ (charset, Vcharset_control_1)
-                  && !EQ (charset, Vcharset_composite))
+#ifdef ENABLE_COMPOSITE_CHARS
+                  && !EQ (charset, Vcharset_composite)
+#endif
+                  )
             {
               int reg;
  
@@ -4300,6 +5599,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
                     }
                   else if (ch)
                     {
+#ifdef ENABLE_COMPOSITE_CHARS
                       if (EQ (charset, Vcharset_composite))
                         {
                           if (in_composite)
@@ -4323,6 +5623,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
                             }
                         }
                       else
+#endif /* ENABLE_COMPOSITE_CHARS */
                         {
                           Dynarr_add (dst, ch & charmask);
                           Dynarr_add (dst, c & charmask);
@@ -4354,7 +5655,9 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
             }
         }
      }
+#endif /* not UTF2000 */
  
+#ifdef ENABLE_COMPOSITE_CHARS
    if (in_composite)
      {
        n = saved_n;
@@ -4364,8 +5667,13 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
        Dynarr_add (dst, '1'); /* end composing */
        goto back_to_square_n; /* Wheeeeeeeee ..... */
      }
+#endif /* ENABLE_COMPOSITE_CHARS */
  
+#ifdef UTF2000
+  if ( (char_boundary == 0) && flags & CODING_STATE_END)
+#else
    if (char_boundary && flags & CODING_STATE_END)
+#endif
      {
        restore_left_to_right_direction (codesys, dst, &flags, 0);
        ensure_normal_shift (str, dst);
@@ -4377,7 +5685,8 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
         }
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
    str->iso2022.current_char_boundary = char_boundary;
    str->iso2022.current_charset = charset;
    str->iso2022.current_half = half;
@@ -4398,12 +5707,10 @@ decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
                              unsigned_char_dynarr *dst, unsigned int n)
  {
    unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
  
    while (n--)
      {
@@ -4416,7 +5723,8 @@ decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
  
    DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
  }
  
  static void
@@ -4425,15 +5733,71 @@ encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
  
    while (n--)
      {
-      c = *src++;
+      c = *src++;        
+#ifdef UTF2000
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         Dynarr_add (dst, ch & 0xff);
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+#else /* not UTF2000 */
        if (c == '\n')
         {
           if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
@@ -4469,9 +5833,14 @@ encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
              untranslatable character, so ignore it */
           ch = 0;
         }
+#endif /* not UTF2000 */
      }
  
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
  }
  
  \f
@@ -4484,24 +5853,27 @@ static Bufbyte_dynarr *conversion_in_dynarr;
  
  /* Determine coding system from coding format */
  
-#define FILE_NAME_CODING_SYSTEM                        \
- ((NILP (Vfile_name_coding_system) ||                  \
-   (EQ ((Vfile_name_coding_system), Qbinary))) ?       \
-  Qnil : Fget_coding_system (Vfile_name_coding_system))
-
  /* #### not correct for all values of `fmt'! */
+static Lisp_Object
+external_data_format_to_coding_system (enum external_data_format fmt)
+{
+  switch (fmt)
+    {
+    case FORMAT_FILENAME:
+    case FORMAT_TERMINAL:
+      if (EQ (Vfile_name_coding_system, Qnil) ||
+         EQ (Vfile_name_coding_system, Qbinary))
+       return Qnil;
+      else
+       return Fget_coding_system (Vfile_name_coding_system);
  #ifdef MULE
-#define FMT_CODING_SYSTEM(fmt)                                 \
- (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM     :   \
-  ((fmt) == FORMAT_CTEXT   ) ? Fget_coding_system (Qctext) :   \
-  ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM     :   \
-  Qnil)
-#else
-#define FMT_CODING_SYSTEM(fmt)                                 \
- (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM     :   \
-  ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM     :   \
-  Qnil)
+    case FORMAT_CTEXT:
+      return Fget_coding_system (Qctext);
  #endif
+    default:
+      return Qnil;
+    }
+}
  
  Extbyte *
  convert_to_external_format (CONST Bufbyte *ptr,
@@ -4509,7 +5881,7 @@ convert_to_external_format (CONST Bufbyte *ptr,
                             Extcount *len_out,
                             enum external_data_format fmt)
  {
-  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
  
    if (!conversion_out_dynarr)
      conversion_out_dynarr = Dynarr_new (Extbyte);
@@ -4522,12 +5894,17 @@ convert_to_external_format (CONST Bufbyte *ptr,
  
        for (; ptr < end;)
          {
+#ifdef UTF2000
+          Bufbyte c =
+           (*ptr < 0xc0) ? *ptr :
+           ((*ptr & 0x1f) << 6) | (*(ptr+1) & 0x3f);
+#else
            Bufbyte c =
              (BYTE_ASCII_P (*ptr))                 ? *ptr :
              (*ptr == LEADING_BYTE_CONTROL_1)      ? (*(ptr+1) - 0x20) :
              (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
              '~';
-
+#endif
            Dynarr_add (conversion_out_dynarr, (Extbyte) c);
            INC_CHARPTR (ptr);
          }
@@ -4577,7 +5954,7 @@ convert_from_external_format (CONST Extbyte *ptr,
                               Bytecount *len_out,
                               enum external_data_format fmt)
  {
-  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
  
    if (!conversion_in_dynarr)
      conversion_in_dynarr = Dynarr_new (Bufbyte);
@@ -4634,7 +6011,7 @@ convert_from_external_format (CONST Extbyte *ptr,
  /************************************************************************/
  
  void
-syms_of_mule_coding (void)
+syms_of_file_coding (void)
  {
    defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
    deferror (&Qcoding_system_error, "coding-system-error",
@@ -4647,6 +6024,7 @@ syms_of_mule_coding (void)
    DEFSUBR (Fcoding_system_name);
    DEFSUBR (Fmake_coding_system);
    DEFSUBR (Fcopy_coding_system);
+  DEFSUBR (Fdefine_coding_system_alias);
    DEFSUBR (Fsubsidiary_coding_system);
  
    DEFSUBR (Fcoding_system_type);
@@ -4670,12 +6048,19 @@ syms_of_mule_coding (void)
    DEFSUBR (Fencode_shift_jis_char);
    DEFSUBR (Fdecode_big5_char);
    DEFSUBR (Fencode_big5_char);
+  DEFSUBR (Fset_ucs_char);
+  DEFSUBR (Fucs_char);
+  DEFSUBR (Fset_char_ucs);
+  DEFSUBR (Fchar_ucs);
  #endif /* MULE */
    defsymbol (&Qcoding_system_p, "coding-system-p");
    defsymbol (&Qno_conversion, "no-conversion");
+  defsymbol (&Qraw_text, "raw-text");
  #ifdef MULE
    defsymbol (&Qbig5, "big5");
    defsymbol (&Qshift_jis, "shift-jis");
+  defsymbol (&Qucs4, "ucs-4");
+  defsymbol (&Qutf8, "utf-8");
    defsymbol (&Qccl, "ccl");
    defsymbol (&Qiso2022, "iso2022");
  #endif /* MULE */
@@ -4719,6 +6104,10 @@ syms_of_mule_coding (void)
              "shift-jis");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
              "big5");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4],
+            "ucs-4");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UTF8],
+            "utf-8");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_7],
              "iso-7");
    defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_DESIGNATE],
@@ -4735,7 +6124,7 @@ syms_of_mule_coding (void)
  }
  
  void
-lstream_type_create_mule_coding (void)
+lstream_type_create_file_coding (void)
  {
    LSTREAM_HAS_METHOD (decoding, reader);
    LSTREAM_HAS_METHOD (decoding, writer);
@@ -4755,7 +6144,7 @@ lstream_type_create_mule_coding (void)
  }
  
  void
-vars_of_mule_coding (void)
+vars_of_file_coding (void)
  {
    int i;
  
@@ -4817,11 +6206,11 @@ Setting this to nil does not do anything.
  }
  
  void
-complex_vars_of_mule_coding (void)
+complex_vars_of_file_coding (void)
  {
-  staticpro (&Vcoding_system_hashtable);
-  Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
-                                                 HASHTABLE_EQ);
+  staticpro (&Vcoding_system_hash_table);
+  Vcoding_system_hash_table =
+    make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
  
    the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
  
@@ -4863,13 +6252,43 @@ complex_vars_of_mule_coding (void)
    DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qdecode);
  #endif /* MULE */
    /* Need to create this here or we're really screwed. */
-  Fmake_coding_system (Qno_conversion, Qno_conversion, build_string ("No conversion"),
-                      list2 (Qmnemonic, build_string ("Noconv")));
+  Fmake_coding_system
+    (Qraw_text, Qno_conversion,
+     build_string ("Raw text, which means it converts only line-break-codes."),
+     list2 (Qmnemonic, build_string ("Raw")));
+
+  Fmake_coding_system
+    (Qbinary, Qno_conversion,
+     build_string ("Binary, which means it does not convert anything."),
+     list4 (Qeol_type, Qlf,
+           Qmnemonic, build_string ("Binary")));
+
+#ifdef UTF2000
+  Fmake_coding_system
+    (Qutf8, Qutf8,
+     build_string ("Coding-system of ISO/IEC 10646 UTF-8."),
+     list2 (Qmnemonic, build_string ("UTF8")));
+#endif
  
-  Fcopy_coding_system (Fcoding_system_property (Qno_conversion, Qeol_lf),
-                      Qbinary);
+  Fdefine_coding_system_alias (Qno_conversion, Qraw_text);
  
    /* Need this for bootstrapping */
    coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
-    Fget_coding_system (Qno_conversion);
+    Fget_coding_system (Qraw_text);
+
+#ifdef UTF2000
+  coding_category_system[CODING_CATEGORY_UTF8]
+   = Fget_coding_system (Qutf8);
+#endif
+
+#ifdef MULE
+  {
+    unsigned int i;
+
+    for (i = 0; i < 65536; i++)
+      ucs_to_mule_table[i] = Qnil;
+  }
+  staticpro (&mule_to_ucs_table);
+  mule_to_ucs_table = Fmake_char_table(Qgeneric);
+#endif /* MULE */
  }