X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Ffile-coding.c;h=3068c896823dac45e334b378d4adf0bf92d709c6;hb=02276b8e7e7b7e647493d52f77beb00d64951836;hp=48363a44a695c183154693e6ad9dac96c05cb4e2;hpb=2e3e3f9ee27fec50f45c282d71eaddf7c673bc56;p=chise%2Fxemacs-chise.git-

diff --git a/src/file-coding.c b/src/file-coding.c
index 48363a4..3068c89 100644
--- a/src/file-coding.c
+++ b/src/file-coding.c
@@ -25,12 +25,14 @@ Boston, MA 02111-1307, USA.  */
 
 #include <config.h>
 #include "lisp.h"
+
 #include "buffer.h"
 #include "elhash.h"
 #include "insdel.h"
 #include "lstream.h"
 #ifdef MULE
 #include "mule-ccl.h"
+#include "chartab.h"
 #endif
 #include "file-coding.h"
 
@@ -54,7 +56,7 @@ int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
 
 Lisp_Object Qcoding_system_p;
 
-Lisp_Object Qno_conversion, Qccl, Qiso2022;
+Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022;
 /* Qinternal in general.c */
 
 Lisp_Object Qmnemonic, Qeol_type;
@@ -64,6 +66,7 @@ Lisp_Object Qpost_read_conversion;
 Lisp_Object Qpre_write_conversion;
 
 #ifdef MULE
+Lisp_Object Qucs4, Qutf8;
 Lisp_Object Qbig5, Qshift_jis;
 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
@@ -75,7 +78,7 @@ Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
 #endif
 Lisp_Object Qencode, Qdecode;
 
-Lisp_Object Vcoding_system_hashtable;
+Lisp_Object Vcoding_system_hash_table;
 
 int enable_multibyte_characters;
 
@@ -103,8 +106,10 @@ struct iso2022_decoder
   /* Index for next byte to store in ISO escape sequence. */
   int esc_bytes_index;
 
+#ifdef ENABLE_COMPOSITE_CHARS
   /* Stuff seen so far when composing a string. */
   unsigned_char_dynarr *composite_chars;
+#endif
 
   /* If we saw an invalid designation sequence for a particular
      register, we flag it here and switch to ASCII.  The next time we
@@ -166,6 +171,24 @@ static void decode_coding_big5 (Lstream *decoding,
 static void encode_coding_big5 (Lstream *encoding,
 				CONST unsigned char *src,
 				unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_ucs4 (struct detection_state *st,
+			       CONST unsigned char *src,
+			       unsigned int n);
+static void decode_coding_ucs4 (Lstream *decoding,
+				CONST unsigned char *src,
+				unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_ucs4 (Lstream *encoding,
+				CONST unsigned char *src,
+				unsigned_char_dynarr *dst, unsigned int n);
+static int detect_coding_utf8 (struct detection_state *st,
+			       CONST unsigned char *src,
+			       unsigned int n);
+static void decode_coding_utf8 (Lstream *decoding,
+				CONST unsigned char *src,
+				unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_utf8 (Lstream *encoding,
+				CONST unsigned char *src,
+				unsigned_char_dynarr *dst, unsigned int n);
 static int postprocess_iso2022_mask (int mask);
 static void reset_iso2022 (Lisp_Object coding_system,
 			   struct iso2022_decoder *iso);
@@ -222,22 +245,58 @@ static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object));
 static void print_coding_system (Lisp_Object, Lisp_Object, int);
 static void finalize_coding_system (void *header, int for_disksave);
 
+#ifdef MULE
+static const struct lrecord_description ccs_description_1[] = {
+  { XD_LISP_OBJECT, offsetof(charset_conversion_spec, from_charset), 2 },
+  { XD_END }
+};
+
+static const struct struct_description ccs_description = {
+  sizeof(charset_conversion_spec),
+  ccs_description_1
+};
+  
+static const struct lrecord_description ccsd_description_1[] = {
+  XD_DYNARR_DESC(charset_conversion_spec_dynarr, &ccs_description),
+  { XD_END }
+};
+
+static const struct struct_description ccsd_description = {
+  sizeof(charset_conversion_spec_dynarr),
+  ccsd_description_1
+};
+#endif
+
+static const struct lrecord_description coding_system_description[] = {
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, name), 2 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, mnemonic), 3 },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, eol_lf), 3 },
+#ifdef MULE
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, iso2022.initial_charset), 4 },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.input_conv),  1, &ccsd_description },
+  { XD_STRUCT_PTR,  offsetof(struct Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
+  { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, ccl.decode), 2 },
+#endif
+  { XD_END }
+};
+
 DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
 			       mark_coding_system, print_coding_system,
 			       finalize_coding_system,
-			       0, 0, struct Lisp_Coding_System);
+			       0, 0, coding_system_description,
+			       struct Lisp_Coding_System);
 
 static Lisp_Object
 mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
 {
-  struct Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
+  Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
 
-  (markobj) (CODING_SYSTEM_NAME (codesys));
-  (markobj) (CODING_SYSTEM_DOC_STRING (codesys));
-  (markobj) (CODING_SYSTEM_MNEMONIC (codesys));
-  (markobj) (CODING_SYSTEM_EOL_LF (codesys));
-  (markobj) (CODING_SYSTEM_EOL_CRLF (codesys));
-  (markobj) (CODING_SYSTEM_EOL_CR (codesys));
+  markobj (CODING_SYSTEM_NAME (codesys));
+  markobj (CODING_SYSTEM_DOC_STRING (codesys));
+  markobj (CODING_SYSTEM_MNEMONIC (codesys));
+  markobj (CODING_SYSTEM_EOL_LF (codesys));
+  markobj (CODING_SYSTEM_EOL_CRLF (codesys));
+  markobj (CODING_SYSTEM_EOL_CR (codesys));
 
   switch (CODING_SYSTEM_TYPE (codesys))
     {
@@ -245,15 +304,15 @@ mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
       int i;
     case CODESYS_ISO2022:
       for (i = 0; i < 4; i++)
-	(markobj) (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
+	markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
       if (codesys->iso2022.input_conv)
 	{
 	  for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
 	    {
 	      struct charset_conversion_spec *ccs =
 		Dynarr_atp (codesys->iso2022.input_conv, i);
-	      (markobj) (ccs->from_charset);
-	      (markobj) (ccs->to_charset);
+	      markobj (ccs->from_charset);
+	      markobj (ccs->to_charset);
 	    }
 	}
       if (codesys->iso2022.output_conv)
@@ -262,22 +321,22 @@ mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
 	    {
 	      struct charset_conversion_spec *ccs =
 		Dynarr_atp (codesys->iso2022.output_conv, i);
-	      (markobj) (ccs->from_charset);
-	      (markobj) (ccs->to_charset);
+	      markobj (ccs->from_charset);
+	      markobj (ccs->to_charset);
 	    }
 	}
       break;
 
     case CODESYS_CCL:
-      (markobj) (CODING_SYSTEM_CCL_DECODE (codesys));
-      (markobj) (CODING_SYSTEM_CCL_ENCODE (codesys));
+      markobj (CODING_SYSTEM_CCL_DECODE (codesys));
+      markobj (CODING_SYSTEM_CCL_ENCODE (codesys));
       break;
 #endif /* MULE */
     default:
       break;
     }
 
-  (markobj) (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
+  markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
   return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
 }
 
@@ -285,7 +344,7 @@ static void
 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
 		     int escapeflag)
 {
-  struct Lisp_Coding_System *c = XCODING_SYSTEM (obj);
+  Lisp_Coding_System *c = XCODING_SYSTEM (obj);
   if (print_readably)
     error ("printing unreadable object #<coding_system 0x%x>",
 	   c->header.uid);
@@ -298,7 +357,7 @@ print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
 static void
 finalize_coding_system (void *header, int for_disksave)
 {
-  struct Lisp_Coding_System *c = (struct Lisp_Coding_System *) header;
+  Lisp_Coding_System *c = (Lisp_Coding_System *) header;
   /* Since coding systems never go away, this function is not
      necessary.  But it would be necessary if we changed things
      so that coding systems could go away. */
@@ -344,16 +403,16 @@ eol_type_to_symbol (enum eol_type type)
 {
   switch (type)
     {
+    default: abort ();
     case EOL_LF:         return Qlf;
     case EOL_CRLF:       return Qcrlf;
     case EOL_CR:         return Qcr;
     case EOL_AUTODETECT: return Qnil;
-    default:             abort (); return Qnil; /* not reached */
     }
 }
 
 static void
-setup_eol_coding_systems (struct Lisp_Coding_System *codesys)
+setup_eol_coding_systems (Lisp_Coding_System *codesys)
 {
   Lisp_Object codesys_obj;
   int len = string_length (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name);
@@ -439,7 +498,7 @@ associated coding system object is returned.
   else
     CHECK_SYMBOL (coding_system_or_name);
 
-  return Fgethash (coding_system_or_name, Vcoding_system_hashtable, Qnil);
+  return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
 }
 
 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
@@ -465,19 +524,15 @@ struct coding_system_list_closure
 };
 
 static int
-add_coding_system_to_list_mapper (CONST void *hash_key, void *hash_contents,
+add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
 				  void *coding_system_list_closure)
 {
   /* This function can GC */
-  Lisp_Object key, contents;
-  Lisp_Object *coding_system_list;
   struct coding_system_list_closure *cscl =
     (struct coding_system_list_closure *) coding_system_list_closure;
-  CVOID_TO_LISP (key, hash_key);
-  VOID_TO_LISP (contents, hash_contents);
-  coding_system_list = cscl->coding_system_list;
+  Lisp_Object *coding_system_list = cscl->coding_system_list;
 
-  *coding_system_list = Fcons (XCODING_SYSTEM (contents)->name,
+  *coding_system_list = Fcons (XCODING_SYSTEM (value)->name,
 			       *coding_system_list);
   return 0;
 }
@@ -493,7 +548,7 @@ Return a list of the names of all defined coding systems.
 
   GCPRO1 (coding_system_list);
   coding_system_list_closure.coding_system_list = &coding_system_list;
-  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hashtable,
+  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table,
 		 &coding_system_list_closure);
   UNGCPRO;
 
@@ -509,11 +564,11 @@ Return the name of the given coding system.
   return XCODING_SYSTEM_NAME (coding_system);
 }
 
-static struct Lisp_Coding_System *
+static Lisp_Coding_System *
 allocate_coding_system (enum coding_system_type type, Lisp_Object name)
 {
-  struct Lisp_Coding_System *codesys =
-    alloc_lcrecord_type (struct Lisp_Coding_System, lrecord_coding_system);
+  Lisp_Coding_System *codesys =
+    alloc_lcrecord_type (Lisp_Coding_System, &lrecord_coding_system);
 
   zero_lcrecord (codesys);
   CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
@@ -612,6 +667,10 @@ nil or 'undecided
      characters will only be present if you explicitly insert them.)
 'shift-jis
      Shift-JIS (a Japanese encoding commonly used in PC operating systems).
+'ucs-4
+     ISO 10646 UCS-4 encoding.
+'utf-8
+     ISO 10646 UTF-8 encoding.
 'iso2022
      Any ISO2022-compliant encoding.  Among other things, this includes
      JIS (the Japanese encoding commonly used for e-mail), EUC (the
@@ -766,7 +825,7 @@ if TYPE is 'ccl:
 */
        (name, type, doc_string, props))
 {
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
   Lisp_Object rest, key, value;
   enum coding_system_type ty;
   int need_to_setup_eol_systems = 1;
@@ -778,6 +837,8 @@ if TYPE is 'ccl:
   else if (EQ (type, Qshift_jis))     { ty = CODESYS_SHIFT_JIS; }
   else if (EQ (type, Qiso2022))       { ty = CODESYS_ISO2022; }
   else if (EQ (type, Qbig5))          { ty = CODESYS_BIG5; }
+  else if (EQ (type, Qucs4))          { ty = CODESYS_UCS4; }
+  else if (EQ (type, Qutf8))          { ty = CODESYS_UTF8; }
   else if (EQ (type, Qccl))           { ty = CODESYS_CCL; }
 #endif
   else if (EQ (type, Qno_conversion)) { ty = CODESYS_NO_CONVERSION; }
@@ -890,7 +951,7 @@ if TYPE is 'ccl:
   {
     Lisp_Object codesys_obj;
     XSETCODING_SYSTEM (codesys_obj, codesys);
-    Fputhash (name, codesys_obj, Vcoding_system_hashtable);
+    Fputhash (name, codesys_obj, Vcoding_system_hash_table);
     return codesys_obj;
   }
 }
@@ -911,12 +972,12 @@ be created.
 			 allocate_coding_system
 			 (XCODING_SYSTEM_TYPE (old_coding_system),
 			  new_name));
-      Fputhash (new_name, new_coding_system, Vcoding_system_hashtable);
+      Fputhash (new_name, new_coding_system, Vcoding_system_hash_table);
     }
 
   {
-    struct Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
-    struct Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
+    Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
+    Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
     memcpy (((char *) to  ) + sizeof (to->header),
 	    ((char *) from) + sizeof (from->header),
 	    sizeof (*from) - sizeof (from->header));
@@ -925,10 +986,44 @@ be created.
   return new_coding_system;
 }
 
+DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
+Define symbol ALIAS as an alias for coding system CODING-SYSTEM.
+*/
+       (alias, coding_system))
+{
+  CHECK_SYMBOL (alias);
+  if (!NILP (Ffind_coding_system (alias)))
+    signal_simple_error ("Symbol already names a coding system", alias);
+  coding_system = Fget_coding_system (coding_system);
+  Fputhash (alias, coding_system, Vcoding_system_hash_table);
+
+  /* Set up aliases for subsidiaries. */
+  if (XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
+    {
+      Lisp_Object str;
+      XSETSTRING (str, symbol_name (XSYMBOL (alias)));
+#define FROB(type, name)							\
+      do {									\
+	Lisp_Object subsidiary = XCODING_SYSTEM_EOL_##type (coding_system);	\
+	if (!NILP (subsidiary))							\
+	  Fdefine_coding_system_alias						\
+	    (Fintern (concat2 (str, build_string (name)), Qnil), subsidiary);	\
+      } while (0)
+      FROB (LF,   "-unix");
+      FROB (CRLF, "-dos");
+      FROB (CR,   "-mac");
+#undef FROB
+    }
+  /* FSF return value is a vector of [ALIAS-unix ALIAS-doc ALIAS-mac],
+     but it doesn't look intentional, so I'd rather return something
+     meaningful or nothing at all. */
+  return Qnil;
+}
+
 static Lisp_Object
 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
 {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
   Lisp_Object new_coding_system;
 
   if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
@@ -978,22 +1073,21 @@ Return the type of CODING-SYSTEM.
 {
   switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
     {
+    default: abort ();
     case CODESYS_AUTODETECT:	return Qundecided;
 #ifdef MULE
     case CODESYS_SHIFT_JIS:	return Qshift_jis;
     case CODESYS_ISO2022:	return Qiso2022;
     case CODESYS_BIG5:		return Qbig5;
+    case CODESYS_UCS4:		return Qucs4;
+    case CODESYS_UTF8:		return Qutf8;
     case CODESYS_CCL:		return Qccl;
 #endif
     case CODESYS_NO_CONVERSION:	return Qno_conversion;
 #ifdef DEBUG_XEMACS
     case CODESYS_INTERNAL:	return Qinternal;
 #endif
-    default:
-      abort ();
     }
-
-  return Qnil; /* not reached */
 }
 
 #ifdef MULE
@@ -1289,6 +1383,20 @@ struct detection_state
   struct
     {
       int mask;
+      int in_byte;
+  }
+  ucs4;
+
+  struct
+    {
+      int mask;
+      int in_byte;
+    }
+  utf8;
+
+  struct
+    {
+      int mask;
       int initted;
       struct iso2022_decoder iso;
       unsigned int flags;
@@ -1405,6 +1513,8 @@ detect_coding_type (struct detection_state *st, CONST unsigned char *src,
 #ifdef MULE
 	      st->shift_jis.mask = ~0;
 	      st->big5.mask = ~0;
+	      st->ucs4.mask = ~0;
+	      st->utf8.mask = ~0;
 	      st->iso2022.mask = ~0;
 #endif
 	      break;
@@ -1421,8 +1531,14 @@ detect_coding_type (struct detection_state *st, CONST unsigned char *src,
     st->shift_jis.mask = detect_coding_sjis (st, src, n);
   if (!mask_has_at_most_one_bit_p (st->big5.mask))
     st->big5.mask = detect_coding_big5 (st, src, n);
-
-  st->mask = st->iso2022.mask | st->shift_jis.mask | st->big5.mask;
+  if (!mask_has_at_most_one_bit_p (st->utf8.mask))
+    st->utf8.mask = detect_coding_utf8 (st, src, n);
+  if (!mask_has_at_most_one_bit_p (st->ucs4.mask))
+    st->ucs4.mask = detect_coding_ucs4 (st, src, n);
+
+  st->mask
+    = st->iso2022.mask | st->shift_jis.mask | st->big5.mask
+    | st->utf8.mask | st->ucs4.mask;
 #endif
   {
     int retval = mask_has_at_most_one_bit_p (st->mask);
@@ -1452,7 +1568,7 @@ coding_system_from_mask (int mask)
 	    }
 	}
       if (NILP (retval))
-	retval = Fget_coding_system (Qno_conversion);
+	retval = Fget_coding_system (Qraw_text);
       return retval;
     }
   else
@@ -1474,7 +1590,7 @@ coding_system_from_mask (int mask)
       if (cat >= 0)
 	return coding_category_system[cat];
       else
-	return Fget_coding_system (Qno_conversion);
+	return Fget_coding_system (Qraw_text);
     }
 }
 
@@ -1504,26 +1620,65 @@ determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
   if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
       *eol_type_in_out == EOL_AUTODETECT)
     {
+      unsigned char random_buffer[4096];
+      int nread;
+      Lisp_Object coding_system = Qnil;
 
-      while (1)
+      nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
+      if (nread)
 	{
-	  unsigned char random_buffer[4096];
-	  int nread;
+	  unsigned char *cp = random_buffer;
 
-	  nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
-	  if (!nread)
-	    break;
-	  if (detect_coding_type (&decst, random_buffer, nread,
-				  XCODING_SYSTEM_TYPE (*codesys_in_out) !=
-				  CODESYS_AUTODETECT))
-	    break;
-	}
+	  while (cp < random_buffer + nread)
+	    {
+	      if ((*cp++ == 'c') && (cp < random_buffer + nread) &&
+		  (*cp++ == 'o') && (cp < random_buffer + nread) &&
+		  (*cp++ == 'd') && (cp < random_buffer + nread) &&
+		  (*cp++ == 'i') && (cp < random_buffer + nread) &&
+		  (*cp++ == 'n') && (cp < random_buffer + nread) &&
+		  (*cp++ == 'g') && (cp < random_buffer + nread) &&
+		  (*cp++ == ':') && (cp < random_buffer + nread))
+		{
+		  unsigned char coding_system_name[4096 - 6];
+		  unsigned char *np = coding_system_name;
 
+		  while ( (cp < random_buffer + nread)
+			  && ((*cp == ' ') || (*cp == '\t')) )
+		    {
+		      cp++;
+		    }
+		  while ( (cp < random_buffer + nread) &&
+			  (*cp != ' ') && (*cp != '\t') && (*cp != ';') )
+		    {
+		      *np++ = *cp++;
+		    }
+		  *np = 0;
+		  coding_system
+		    = Ffind_coding_system (intern (coding_system_name));
+		  break;
+		}
+	    }
+	  if (EQ(coding_system, Qnil))
+	    do{
+	      if (detect_coding_type (&decst, random_buffer, nread,
+				      XCODING_SYSTEM_TYPE (*codesys_in_out)
+				      != CODESYS_AUTODETECT))
+		break;
+	      nread = Lstream_read (stream,
+				    random_buffer, sizeof (random_buffer));
+	      if (!nread)
+		break;
+	    } while(1);
+	}
       *eol_type_in_out = decst.eol_type;
       if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
-	*codesys_in_out = coding_system_from_mask (decst.mask);
+	{
+	  if (EQ(coding_system, Qnil))
+	    *codesys_in_out = coding_system_from_mask (decst.mask);
+	  else
+	    *codesys_in_out = coding_system;
+	}
     }
-
   /* If we absolutely can't determine the EOL type, just assume LF. */
   if (*eol_type_in_out == EOL_AUTODETECT)
     *eol_type_in_out = EOL_LF;
@@ -1646,6 +1801,62 @@ do {								\
 /* C should be a binary character in the range 0 - 255; convert
    to internal format and add to Dynarr DST. */
 
+#ifdef UTF2000
+#define DECODE_ADD_BINARY_CHAR(c, dst) \
+do {						\
+  if (BYTE_ASCII_P (c))				\
+    Dynarr_add (dst, c);			\
+  else						\
+    {						\
+      Dynarr_add (dst, (c >> 6) | 0xc0);	\
+      Dynarr_add (dst, (c & 0x3f) | 0x80);	\
+    }						\
+} while (0)
+
+INLINE void
+DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst)
+{
+  if ( c <= 0x7f )
+    {
+      Dynarr_add (dst, c);
+    }
+  else if ( c <= 0x7ff )
+    {
+      Dynarr_add (dst, (c >> 6) | 0xc0);
+      Dynarr_add (dst, (c & 0x3f) | 0x80);
+    }
+  else if ( c <= 0xffff )
+    {
+      Dynarr_add (dst,  (c >> 12) | 0xe0);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (c >> 18) | 0xf0);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else if ( c <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (c >> 24) | 0xf8);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (c >> 30) | 0xfc);
+      Dynarr_add (dst, ((c >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((c >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (c        & 0x3f) | 0x80);
+    }
+}
+#else
 #define DECODE_ADD_BINARY_CHAR(c, dst)		\
 do {						\
   if (BYTE_ASCII_P (c))				\
@@ -1661,6 +1872,7 @@ do {						\
       Dynarr_add (dst, c);			\
     }						\
 } while (0)
+#endif
 
 #define DECODE_OUTPUT_PARTIAL_CHAR(ch)	\
 do {					\
@@ -1673,10 +1885,12 @@ do {					\
 
 #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)	\
 do {					\
-  DECODE_OUTPUT_PARTIAL_CHAR (ch);	\
-  if ((flags & CODING_STATE_END) &&	\
-      (flags & CODING_STATE_CR))	\
-    Dynarr_add (dst, '\r');		\
+  if (flags & CODING_STATE_END)		\
+    {					\
+      DECODE_OUTPUT_PARTIAL_CHAR (ch);	\
+      if (flags & CODING_STATE_CR)	\
+	Dynarr_add (dst, '\r');		\
+    }					\
 } while (0)
 
 #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
@@ -1684,7 +1898,7 @@ do {					\
 struct decoding_stream
 {
   /* Coding system that governs the conversion. */
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
 
   /* Stream that we read the encoded data from or
      write the decoded data to. */
@@ -1718,6 +1932,9 @@ struct decoding_stream
   /* Additional information (the state of the running CCL program)
      used by the CCL decoder. */
   struct ccl_program ccl;
+
+  /* counter for UTF-8 or UCS-4 */
+  unsigned char counter;
 #endif
   struct detection_state decst;
 };
@@ -1746,7 +1963,7 @@ decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
      and automatically marked. */
 
   XSETLSTREAM (str_obj, str);
-  (markobj) (str_obj);
+  markobj (str_obj);
   if (str->imp->marker)
     return (str->imp->marker) (str_obj, markobj);
   else
@@ -1852,6 +2069,7 @@ reset_decoding_stream (struct decoding_stream *str)
     {
       setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
     }
+  str->counter = 0;
 #endif /* MULE */
   str->flags = str->ch = 0;
 }
@@ -1890,9 +2108,11 @@ decoding_closer (Lstream *stream)
     }
   Dynarr_free (str->runoff);
 #ifdef MULE
+#ifdef ENABLE_COMPOSITE_CHARS
   if (str->iso2022.composite_chars)
     Dynarr_free (str->iso2022.composite_chars);
 #endif
+#endif
   return Lstream_close (str->other_end);
 }
 
@@ -1909,7 +2129,7 @@ decoding_stream_coding_system (Lstream *stream)
 void
 set_decoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
 {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
   struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
   str->codesys = cs;
   if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
@@ -2026,8 +2246,15 @@ mule_decode (Lstream *decoding, CONST unsigned char *src,
     case CODESYS_BIG5:
       decode_coding_big5 (decoding, src, dst, n);
       break;
+    case CODESYS_UCS4:
+      decode_coding_ucs4 (decoding, src, dst, n);
+      break;
+    case CODESYS_UTF8:
+      decode_coding_utf8 (decoding, src, dst, n);
+      break;
     case CODESYS_CCL:
-      ccl_driver (&str->ccl, src, dst, n, 0);
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING);
       break;
     case CODESYS_ISO2022:
       decode_coding_iso2022 (decoding, src, dst, n);
@@ -2117,7 +2344,7 @@ BUFFER defaults to the current buffer if unspecified.
 struct encoding_stream
 {
   /* Coding system that governs the conversion. */
-  struct Lisp_Coding_System *codesys;
+  Lisp_Coding_System *codesys;
 
   /* Stream that we read the encoded data from or
      write the decoded data to. */
@@ -2192,7 +2419,7 @@ encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
      and automatically marked. */
 
   XSETLSTREAM (str_obj, str);
-  (markobj) (str_obj);
+  markobj (str_obj);
   if (str->imp->marker)
     return (str->imp->marker) (str_obj, markobj);
   else
@@ -2305,7 +2532,11 @@ reset_encoding_stream (struct encoding_stream *str)
 	str->iso2022.register_right = 1;
 	str->iso2022.current_charset = Qnil;
 	str->iso2022.current_half = 0;
+#ifdef UTF2000
+	str->iso2022.current_char_boundary = 0;
+#else
 	str->iso2022.current_char_boundary = 1;
+#endif
 	break;
       }
     case CODESYS_CCL:
@@ -2368,7 +2599,7 @@ encoding_stream_coding_system (Lstream *stream)
 void
 set_encoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
 {
-  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+  Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
   struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
   str->codesys = cs;
   reset_encoding_stream (str);
@@ -2432,8 +2663,15 @@ mule_encode (Lstream *encoding, CONST unsigned char *src,
     case CODESYS_BIG5:
       encode_coding_big5 (encoding, src, dst, n);
       break;
+    case CODESYS_UCS4:
+      encode_coding_ucs4 (encoding, src, dst, n);
+      break;
+    case CODESYS_UTF8:
+      encode_coding_utf8 (encoding, src, dst, n);
+      break;
     case CODESYS_CCL:
-      ccl_driver (&str->ccl, src, dst, n, 0);
+      str->ccl.last_block = str->flags & CODING_STATE_END;
+      ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING);
       break;
     case CODESYS_ISO2022:
       encode_coding_iso2022 (encoding, src, dst, n);
@@ -2517,9 +2755,9 @@ text.  BUFFER defaults to the current buffer if unspecified.
 
 /* Shift-JIS is a coding system encoding three character sets: ASCII, right
    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
-   as is.  A character of JISX0201-Kana (TYPE94 character set) is
+   as is.  A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is
    encoded by "position-code + 0x80".  A character of JISX0208
-   (TYPE94x94 character set) is encoded in 2-byte but two
+   (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two
    position-codes are divided and shifted so that it fit in the range
    below.
 
@@ -2576,12 +2814,10 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
 		    unsigned_char_dynarr *dst, unsigned int n)
 {
   unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
 
   while (n--)
     {
@@ -2594,10 +2830,16 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
 	    {
 	      unsigned char e1, e2;
 
-	      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
 	      DECODE_SJIS (ch, c, e1, e2);
+#ifdef UTF2000
+	      DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_japanese_jisx0208,
+					    e1 & 0x7F,
+					    e2 & 0x7F), dst);
+#else
+	      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
 	      Dynarr_add (dst, e1);
 	      Dynarr_add (dst, e2);
+#endif
 	    }
 	  else
 	    {
@@ -2613,8 +2855,13 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
 	    ch = c;
 	  else if (BYTE_SJIS_KATAKANA_P (c))
 	    {
+#ifdef UTF2000
+	      DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_katakana_jisx0201,
+					    c & 0x7F, 0), dst);
+#else
 	      Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201);
 	      Dynarr_add (dst, c);
+#endif
 	    }
 	  else
 	    DECODE_ADD_BINARY_CHAR (c, dst);
@@ -2624,7 +2871,8 @@ decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
 
   DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
 }
 
 /* Convert internally-formatted data to Shift-JIS. */
@@ -2635,15 +2883,85 @@ encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
 {
   unsigned char c;
   struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
 
   while (n--)
     {
       c = *src++;
+#ifdef UTF2000
+      switch (char_boundary)
+	{
+	case 0:
+	  if ( c >= 0xfc )
+	    {
+	      ch = c & 0x01;
+	      char_boundary = 5;
+	    }
+	  else if ( c >= 0xf8 )
+	    {
+	      ch = c & 0x03;
+	      char_boundary = 4;
+	    }
+	  else if ( c >= 0xf0 )
+	    {
+	      ch = c & 0x07;
+	      char_boundary = 3;
+	    }
+	  else if ( c >= 0xe0 )
+	    {
+	      ch = c & 0x0f;
+	      char_boundary = 2;
+	    }
+	  else if ( c >= 0xc0 )
+	    {
+	      ch = c & 0x1f;
+	      char_boundary = 1;
+	    }
+	  else
+	    {
+	      ch = 0;
+	      if (c == '\n')
+		{
+		  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+		    Dynarr_add (dst, '\r');
+		  if (eol_type != EOL_CR)
+		    Dynarr_add (dst, c);
+		}
+	      else
+		Dynarr_add (dst, c);
+	      char_boundary = 0;
+	    }
+	  break;
+	case 1:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  {
+	    Lisp_Object charset;
+	    unsigned int c1, c2, s1, s2;
+	    
+	    BREAKUP_CHAR (ch, charset, c1, c2);
+	    if (EQ(charset, Vcharset_katakana_jisx0201))
+	      {
+		Dynarr_add (dst, c1 | 0x80);
+	      }
+	    else if (EQ(charset, Vcharset_japanese_jisx0208))
+	      {
+		ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+		Dynarr_add (dst, s1);
+		Dynarr_add (dst, s2);
+	      }
+	  }
+	  char_boundary = 0;
+	  break;
+	default:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  char_boundary--;
+	}
+#else
       if (c == '\n')
 	{
 	  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
@@ -2680,9 +2998,14 @@ encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
 	      ch = 0;
 	    }
 	}
+#endif
     }
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
 }
 
 DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
@@ -2748,8 +3071,8 @@ Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
 
    Since the number of characters in Big5 is larger than maximum
    characters in Emacs' charset (96x96), it can't be handled as one
-   charset.  So, in Emacs, Big5 is devided into two: `charset-big5-1'
-   and `charset-big5-2'.  Both <type>s are TYPE94x94.  The former
+   charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
+   and `charset-big5-2'.  Both <type>s are DIMENSION2_CHARS94.  The former
    contains frequently used characters and the latter contains less
    frequently used characters.  */
 
@@ -2865,12 +3188,10 @@ decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
 		    unsigned_char_dynarr *dst, unsigned int n)
 {
   unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
 
   while (n--)
     {
@@ -2906,7 +3227,8 @@ decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
 
   DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
 }
 
 /* Convert internally-formatted data to Big5. */
@@ -2915,13 +3237,12 @@ static void
 encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
 		    unsigned_char_dynarr *dst, unsigned int n)
 {
+#ifndef UTF2000
   unsigned char c;
   struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
 
   while (n--)
     {
@@ -2969,7 +3290,9 @@ encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
       ch = 0;
     }
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#endif
 }
 
 
@@ -2990,7 +3313,7 @@ Return the corresponding character.
   if (BYTE_BIG5_TWO_BYTE_1_P (b1) &&
       BYTE_BIG5_TWO_BYTE_2_P (b2))
     {
-      int leading_byte;
+      Charset_ID leading_byte;
       Lisp_Object charset;
       DECODE_BIG5 (b1, b2, leading_byte, c1, c2);
       charset = CHARSET_BY_LEADING_BYTE (leading_byte);
@@ -3024,133 +3347,856 @@ Return the corresponding character code in Big5.
 
 
 /************************************************************************/
-/*                           ISO2022 methods                            */
+/*                           UCS-4 methods                              */
+/*                                                                      */
+/*  UCS-4 character codes are implemented as nonnegative integers.      */
+/*                                                                      */
 /************************************************************************/
 
-/* The following note describes the coding system ISO2022 briefly.
-   Since the intention of this note is to help understanding of the
-   programs in this file, some parts are NOT ACCURATE or OVERLY
-   SIMPLIFIED.  For thorough understanding, please refer to the
-   original document of ISO2022.
+Lisp_Object ucs_to_mule_table[65536];
+Lisp_Object mule_to_ucs_table;
 
-   ISO2022 provides many mechanisms to encode several character sets
-   in 7-bit and 8-bit environments.  If one chooses 7-bit environment,
-   all text is encoded by codes of less than 128.  This may make the
-   encoded text a little bit longer, but the text get more stability
-   to pass through several gateways (some of them strip off MSB).
+DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
+Map UCS-4 code CODE to Mule character CHARACTER.
 
-   There are two kind of character sets: control character set and
-   graphic character set.  The former contains control characters such
-   as `newline' and `escape' to provide control functions (control
-   functions are provided also by escape sequence).  The latter
-   contains graphic characters such as 'A' and '-'.  Emacs recognizes
-   two control character sets and many graphic character sets.
+Return T on success, NIL on failure.
+*/
+       (code, character))
+{
+  unsigned int c;
 
-   Graphic character sets are classified into one of four types,
-   according to the dimension and number of characters in the set:
-   TYPE94, TYPE96, TYPE94x94, and TYPE96x96.  In addition, each
-   character set is assigned an identification byte, unique for each
-   type, called "final character" (denoted as <F> hereafter).  The <F>
-   of each character set is decided by ECMA(*) when it is registered
-   in ISO.  Code range of <F> is 0x30..0x7F (0x30..0x3F are for
-   private use only).
+  CHECK_CHAR (character);
+  CHECK_INT (code);
+  c = XINT (code);
 
-   Note (*): ECMA = European Computer Manufacturers Association
+  if (c < sizeof (ucs_to_mule_table))
+    {
+      ucs_to_mule_table[c] = character;
+      return Qt;
+    }
+  else
+    return Qnil;
+}
 
-   Here are examples of graphic character set [NAME(<F>)]:
-	o TYPE94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
-	o TYPE96 -- right-half-of-ISO8859-1('A'), ...
-	o TYPE94x94 -- GB2312('A'), JISX0208('B'), ...
-	o TYPE96x96 -- none for the moment
+static Lisp_Object
+ucs_to_char (unsigned long code)
+{
+  if (code < sizeof (ucs_to_mule_table))
+    {
+      return ucs_to_mule_table[code];
+    }
+  else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
+    {
+      unsigned int c;
+
+      code -= 0xe00000;
+      c = code % (94 * 94);
+      return make_char
+	(MAKE_CHAR (CHARSET_BY_ATTRIBUTES
+		    (CHARSET_TYPE_94X94, code / (94 * 94) + '@',
+		     CHARSET_LEFT_TO_RIGHT),
+		    c / 94 + 33, c % 94 + 33));
+    }
+  else
+    return Qnil;
+}
 
-   A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
-	C0 [0x00..0x1F] -- control character plane 0
-	GL [0x20..0x7F] -- graphic character plane 0
-	C1 [0x80..0x9F] -- control character plane 1
-	GR [0xA0..0xFF] -- graphic character plane 1
+DEFUN ("ucs-char", Fucs_char, 1, 1, 0, /*
+Return Mule character corresponding to UCS code CODE (a positive integer).
+*/
+       (code))
+{
+  CHECK_NATNUM (code);
+  return ucs_to_char (XINT (code));
+}
 
-   A control character set is directly designated and invoked to C0 or
-   C1 by an escape sequence.  The most common case is that:
-   - ISO646's  control character set is designated/invoked to C0, and
-   - ISO6429's control character set is designated/invoked to C1,
-   and usually these designations/invocations are omitted in encoded
-   text.  In a 7-bit environment, only C0 can be used, and a control
-   character for C1 is encoded by an appropriate escape sequence to
-   fit into the environment.  All control characters for C1 are
-   defined to have corresponding escape sequences.
+DEFUN ("set-char-ucs", Fset_char_ucs, 2, 2, 0, /*
+Map Mule character CHARACTER to UCS code CODE (a positive integer).
+*/
+       (character, code))
+{
+  /* #### Isn't this gilding the lily?  Fput_char_table checks its args.
+          Fset_char_ucs is more restrictive on index arg, but should
+          check code arg in a char_table method. */
+  CHECK_CHAR (character);
+  CHECK_NATNUM (code);
+  return Fput_char_table (character, code, mule_to_ucs_table);
+}
 
-   A graphic character set is at first designated to one of four
-   graphic registers (G0 through G3), then these graphic registers are
-   invoked to GL or GR.  These designations and invocations can be
-   done independently.  The most common case is that G0 is invoked to
-   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
-   these invocations and designations are omitted in encoded text.
-   In a 7-bit environment, only GL can be used.
+DEFUN ("char-ucs", Fchar_ucs, 1, 1, 0, /*
+Return the UCS code (a positive integer) corresponding to CHARACTER.
+*/
+       (character))
+{
+  return Fget_char_table (character, mule_to_ucs_table);
+}
 
-   When a graphic character set of TYPE94 or TYPE94x94 is invoked to
-   GL, codes 0x20 and 0x7F of the GL area work as control characters
-   SPACE and DEL respectively, and code 0xA0 and 0xFF of GR area
-   should not be used.
+#ifdef UTF2000
+#define decode_ucs4 DECODE_ADD_UCS_CHAR
+#else
+/* Decode a UCS-4 character into a buffer.  If the lookup fails, use
+   <GETA MARK> (U+3013) of JIS X 0208, which means correct character
+   is not found, instead.
+   #### do something more appropriate (use blob?)
+        Danger, Will Robinson!  Data loss.  Should we signal user? */
+static void
+decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst)
+{
+  Lisp_Object chr = ucs_to_char (ch);
 
-   There are two ways of invocation: locking-shift and single-shift.
-   With locking-shift, the invocation lasts until the next different
-   invocation, whereas with single-shift, the invocation works only
-   for the following character and doesn't affect locking-shift.
-   Invocations are done by the following control characters or escape
-   sequences.
+  if (! NILP (chr))
+    {
+      Bufbyte work[MAX_EMCHAR_LEN];
+      int len;
+
+      ch = XCHAR (chr);
+      len = (ch < 128) ?
+	simple_set_charptr_emchar (work, ch) :
+	non_ascii_set_charptr_emchar (work, ch);
+      Dynarr_add_many (dst, work, len);
+    }
+  else
+    {
+      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
+      Dynarr_add (dst, 34 + 128);
+      Dynarr_add (dst, 46 + 128);
+    }
+}
+#endif
 
-   ----------------------------------------------------------------------
-   abbrev  function	             cntrl escape seq	description
-   ----------------------------------------------------------------------
-   SI/LS0  (shift-in)		     0x0F  none		invoke G0 into GL
-   SO/LS1  (shift-out)		     0x0E  none		invoke G1 into GL
-   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR
-   LS2     (locking-shift-2)	     none  ESC 'n'	invoke G2 into GL
-   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR
-   LS3     (locking-shift-3)	     none  ESC 'o'	invoke G3 into GL
-   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR
-   SS2     (single-shift-2)	     0x8E  ESC 'N'	invoke G2 for one char
-   SS3     (single-shift-3)	     0x8F  ESC 'O'	invoke G3 for one char
-   ----------------------------------------------------------------------
-   The first four are for locking-shift.  Control characters for these
-   functions are defined by macros ISO_CODE_XXX in `coding.h'.
+static unsigned long
+mule_char_to_ucs4 (Lisp_Object charset,
+		   unsigned char h, unsigned char l)
+{
+  Lisp_Object code
+    = Fget_char_table (make_char (MAKE_CHAR (charset, h & 127, l & 127)),
+		       mule_to_ucs_table);
 
-   Designations are done by the following escape sequences.
-   ----------------------------------------------------------------------
-   escape sequence	description
-   ----------------------------------------------------------------------
-   ESC '(' <F>		designate TYPE94<F> to G0
-   ESC ')' <F>		designate TYPE94<F> to G1
-   ESC '*' <F>		designate TYPE94<F> to G2
-   ESC '+' <F>		designate TYPE94<F> to G3
-   ESC ',' <F>		designate TYPE96<F> to G0 (*)
-   ESC '-' <F>		designate TYPE96<F> to G1
-   ESC '.' <F>		designate TYPE96<F> to G2
-   ESC '/' <F>		designate TYPE96<F> to G3
-   ESC '$' '(' <F>	designate TYPE94x94<F> to G0 (**)
-   ESC '$' ')' <F>	designate TYPE94x94<F> to G1
-   ESC '$' '*' <F>	designate TYPE94x94<F> to G2
-   ESC '$' '+' <F>	designate TYPE94x94<F> to G3
-   ESC '$' ',' <F>	designate TYPE96x96<F> to G0 (*)
-   ESC '$' '-' <F>	designate TYPE96x96<F> to G1
-   ESC '$' '.' <F>	designate TYPE96x96<F> to G2
-   ESC '$' '/' <F>	designate TYPE96x96<F> to G3
-   ----------------------------------------------------------------------
-   In this list, "TYPE94<F>" means a graphic character set of type TYPE94
-   and final character <F>, and etc.
+  if (INTP (code))
+    {
+      return XINT (code);
+    }
+  else if ( (XCHARSET_DIMENSION (charset) == 2) &&
+	    (XCHARSET_CHARS (charset) == 94) )
+    {
+      unsigned char final = XCHARSET_FINAL (charset);
 
-   Note (*): Although these designations are not allowed in ISO2022,
-   Emacs accepts them on decoding, and produces them on encoding
-   TYPE96 or TYPE96x96 character set in a coding system which is
-   characterized as 7-bit environment, non-locking-shift, and
-   non-single-shift.
+      if ( ('@' <= final) && (final < 0x7f) )
+	{
+	  return 0xe00000 + (final - '@') * 94 * 94
+	    + ((h & 127) - 33) * 94 + (l & 127) - 33;
+	}
+      else
+	{
+	  return '?';
+	}
+    }
+  else
+    {
+      return '?';
+    }
+}
 
-   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
-   '(' can be omitted.  We call this as "short-form" here after.
+static void
+encode_ucs4 (Lisp_Object charset,
+	     unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
+{
+  unsigned long code = mule_char_to_ucs4 (charset, h, l);
+  Dynarr_add (dst,  code >> 24);
+  Dynarr_add (dst, (code >> 16) & 255);
+  Dynarr_add (dst, (code >>  8) & 255);
+  Dynarr_add (dst,  code        & 255);
+}
 
-   Now you may notice that there are a lot of ways for encoding the
+static int
+detect_coding_ucs4 (struct detection_state *st, CONST unsigned char *src,
+		    unsigned int n)
+{
+  while (n--)
+    {
+      int c = *src++;
+      switch (st->ucs4.in_byte)
+	{
+	case 0:
+	  if (c >= 128)
+	    return 0;
+	  else
+	    st->ucs4.in_byte++;
+	  break;
+	case 3:
+	  st->ucs4.in_byte = 0;
+	  break;
+	default:
+	  st->ucs4.in_byte++;
+	}
+    }
+  return CODING_CATEGORY_UCS4_MASK;
+}
+
+static void
+decode_coding_ucs4 (Lstream *decoding, CONST unsigned char *src,
+		    unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags = str->flags;
+  unsigned int ch    = str->ch;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+	{
+	case 0:
+	  ch = c;
+	  counter = 3;
+	  break;
+	case 1:
+	  decode_ucs4 ( ( ch << 8 ) | c, dst);
+	  ch = 0;
+	  counter = 0;
+	  break;
+	default:
+	  ch = ( ch << 8 ) | c;
+	  counter--;
+	}
+    }
+  if (counter & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+static void
+encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src,
+		    unsigned_char_dynarr *dst, unsigned int n)
+{
+#ifndef UTF2000
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags = str->flags;
+  unsigned int ch = str->ch;
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+  Lisp_Object charset = str->iso2022.current_charset;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* flags for handling composite chars.  We do a little switcharoo
+     on the source while we're outputting the composite char. */
+  unsigned int saved_n = 0;
+  CONST unsigned char *saved_src = NULL;
+  int in_composite = 0;
+
+ back_to_square_n:
+#endif
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+
+      if (BYTE_ASCII_P (c))
+	{		/* Processing ASCII character */
+	  ch = 0;
+	  encode_ucs4 (Vcharset_ascii, c, 0, dst);
+	  char_boundary = 1;
+	}
+      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+	{ /* Processing Leading Byte */
+	  ch = 0;
+	  charset = CHARSET_BY_LEADING_BYTE (c);
+	  if (LEADING_BYTE_PREFIX_P(c))
+	    ch = c;
+	  char_boundary = 0;
+	}
+      else
+	{			/* Processing Non-ASCII character */
+	  char_boundary = 1;
+	  if (EQ (charset, Vcharset_control_1))
+	    {
+	      encode_ucs4 (Vcharset_control_1, c, 0, dst);
+	    }
+	  else
+	    {
+	      switch (XCHARSET_REP_BYTES (charset))
+		{
+		case 2:
+		  encode_ucs4 (charset, c, 0, dst);
+		  break;
+		case 3:
+		  if (XCHARSET_PRIVATE_P (charset))
+		    {
+		      encode_ucs4 (charset, c, 0, dst);
+		      ch = 0;
+		    }
+		  else if (ch)
+		    {
+#ifdef ENABLE_COMPOSITE_CHARS
+		      if (EQ (charset, Vcharset_composite))
+			{
+			  if (in_composite)
+			    {
+			      /* #### Bother! We don't know how to
+				 handle this yet. */
+			      Dynarr_add (dst, 0);
+			      Dynarr_add (dst, 0);
+			      Dynarr_add (dst, 0);
+			      Dynarr_add (dst, '~');
+			    }
+			  else
+			    {
+			      Emchar emch = MAKE_CHAR (Vcharset_composite,
+						       ch & 0x7F, c & 0x7F);
+			      Lisp_Object lstr = composite_char_string (emch);
+			      saved_n = n;
+			      saved_src = src;
+			      in_composite = 1;
+			      src = XSTRING_DATA   (lstr);
+			      n   = XSTRING_LENGTH (lstr);
+			    }
+			}
+		      else
+#endif /* ENABLE_COMPOSITE_CHARS */
+			{
+			  encode_ucs4(charset, ch, c, dst);
+			}
+		      ch = 0;
+		    }
+		  else
+		    {
+		      ch = c;
+		      char_boundary = 0;
+		    }
+		  break;
+		case 4:
+		  if (ch)
+		    {
+		      encode_ucs4 (charset, ch, c, dst);
+		      ch = 0;
+		    }
+		  else
+		    {
+		      ch = c;
+		      char_boundary = 0;
+		    }
+		  break;
+		default:
+		  abort ();
+		}
+	    }
+	}
+    }
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (in_composite)
+    {
+      n = saved_n;
+      src = saved_src;
+      in_composite = 0;
+      goto back_to_square_n; /* Wheeeeeeeee ..... */
+    }
+#endif /* ENABLE_COMPOSITE_CHARS */
+
+  str->flags = flags;
+  str->ch = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+  str->iso2022.current_charset = charset;
+
+  /* Verbum caro factum est! */
+#endif
+}
+
+
+/************************************************************************/
+/*                           UTF-8 methods                              */
+/************************************************************************/
+
+static int
+detect_coding_utf8 (struct detection_state *st, CONST unsigned char *src,
+		    unsigned int n)
+{
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (st->utf8.in_byte)
+	{
+	case 0:
+	  if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+	    return 0;
+	  else if (c >= 0xfc)
+	    st->utf8.in_byte = 5;
+	  else if (c >= 0xf8)
+	    st->utf8.in_byte = 4;
+	  else if (c >= 0xf0)
+	    st->utf8.in_byte = 3;
+	  else if (c >= 0xe0)
+	    st->utf8.in_byte = 2;
+	  else if (c >= 0xc0)
+	    st->utf8.in_byte = 1;
+	  else if (c >= 0x80)
+	    return 0;
+	  break;
+	default:
+	  if ((c & 0xc0) != 0x80)
+	    return 0;
+	  else
+	    st->utf8.in_byte--;
+	}
+    }
+  return CODING_CATEGORY_UTF8_MASK;
+}
+
+static void
+decode_coding_utf8 (Lstream *decoding, CONST unsigned char *src,
+		    unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+  unsigned char counter = str->counter;
+
+  while (n--)
+    {
+      unsigned char c = *src++;
+      switch (counter)
+	{
+	case 0:
+	  if ( c >= 0xfc )
+	    {
+	      ch = c & 0x01;
+	      counter = 5;
+	    }
+	  else if ( c >= 0xf8 )
+	    {
+	      ch = c & 0x03;
+	      counter = 4;
+	    }
+	  else if ( c >= 0xf0 )
+	    {
+	      ch = c & 0x07;
+	      counter = 3;
+	    }
+	  else if ( c >= 0xe0 )
+	    {
+	      ch = c & 0x0f;
+	      counter = 2;
+	    }
+	  else if ( c >= 0xc0 )
+	    {
+	      ch = c & 0x1f;
+	      counter = 1;
+	    }
+	  else
+	    {
+	      DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+	      decode_ucs4 (c, dst);
+	    }
+	  break;
+	case 1:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  decode_ucs4 (ch, dst);
+	  ch = 0;
+	  counter = 0;
+	  break;
+	default:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  counter--;
+	}
+    label_continue_loop:;
+    }
+
+  if (flags & CODING_STATE_END)
+    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+
+  str->flags = flags;
+  str->ch    = ch;
+  str->counter = counter;
+}
+
+#ifndef UTF2000
+static void
+encode_utf8 (Lisp_Object charset,
+	     unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
+{
+  unsigned long code = mule_char_to_ucs4 (charset, h, l);
+  if ( code <= 0x7f )
+    {
+      Dynarr_add (dst, code);
+    }
+  else if ( code <= 0x7ff )
+    {
+      Dynarr_add (dst, (code >> 6) | 0xc0);
+      Dynarr_add (dst, (code & 0x3f) | 0x80);
+    }
+  else if ( code <= 0xffff )
+    {
+      Dynarr_add (dst,  (code >> 12) | 0xe0);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x1fffff )
+    {
+      Dynarr_add (dst,  (code >> 18) | 0xf0);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else if ( code <= 0x3ffffff )
+    {
+      Dynarr_add (dst,  (code >> 24) | 0xf8);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+  else
+    {
+      Dynarr_add (dst,  (code >> 30) | 0xfc);
+      Dynarr_add (dst, ((code >> 24) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
+      Dynarr_add (dst, ((code >>  6) & 0x3f) | 0x80);
+      Dynarr_add (dst,  (code        & 0x3f) | 0x80);
+    }
+}
+#endif
+
+static void
+encode_coding_utf8 (Lstream *encoding, CONST unsigned char *src,
+		    unsigned_char_dynarr *dst, unsigned int n)
+{
+  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#ifdef UTF2000
+
+  while (n--)
+    {
+      unsigned char c = *src++;	  
+      switch (char_boundary)
+	{
+	case 0:
+	  if ( c >= 0xfc )
+	    {
+	      Dynarr_add (dst, c);
+	      char_boundary = 5;
+	    }
+	  else if ( c >= 0xf8 )
+	    {
+	      Dynarr_add (dst, c);
+	      char_boundary = 4;
+	    }
+	  else if ( c >= 0xf0 )
+	    {
+	      Dynarr_add (dst, c);
+	      char_boundary = 3;
+	    }
+	  else if ( c >= 0xe0 )
+	    {
+	      Dynarr_add (dst, c);
+	      char_boundary = 2;
+	    }
+	  else if ( c >= 0xc0 )
+	    {
+	      Dynarr_add (dst, c);
+	      char_boundary = 1;
+	    }
+	  else
+	    {
+	      if (c == '\n')
+		{
+		  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+		    Dynarr_add (dst, '\r');
+		  if (eol_type != EOL_CR)
+		    Dynarr_add (dst, c);
+		}
+	      else
+		Dynarr_add (dst, c);
+	      char_boundary = 0;
+	    }
+	  break;
+	case 1:
+	  Dynarr_add (dst, c);
+	  char_boundary = 0;
+	  break;
+	default:
+	  Dynarr_add (dst, c);
+	  char_boundary--;
+	}
+    }
+#else /* not UTF2000 */
+  Lisp_Object charset = str->iso2022.current_charset;
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  /* flags for handling composite chars.  We do a little switcharoo
+     on the source while we're outputting the composite char. */
+  unsigned int saved_n = 0;
+  CONST unsigned char *saved_src = NULL;
+  int in_composite = 0;
+
+ back_to_square_n:
+#endif /* ENABLE_COMPOSITE_CHARS */
+  
+  while (n--)
+    {
+      unsigned char c = *src++;
+
+      if (BYTE_ASCII_P (c))
+	{		/* Processing ASCII character */
+	  ch = 0;
+	  if (c == '\n')
+	    {
+	      if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+		Dynarr_add (dst, '\r');
+	      if (eol_type != EOL_CR)
+		Dynarr_add (dst, c);
+	    }
+	  else
+	    encode_utf8 (Vcharset_ascii, c, 0, dst);
+	  char_boundary = 1;
+	}
+      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+	{ /* Processing Leading Byte */
+	  ch = 0;
+	  charset = CHARSET_BY_LEADING_BYTE (c);
+	  if (LEADING_BYTE_PREFIX_P(c))
+	    ch = c;
+	  char_boundary = 0;
+	}
+      else
+	{			/* Processing Non-ASCII character */
+	  char_boundary = 1;
+	  if (EQ (charset, Vcharset_control_1))
+	    {
+	      encode_utf8 (Vcharset_control_1, c, 0, dst);
+	    }
+	  else
+	    {
+	      switch (XCHARSET_REP_BYTES (charset))
+		{
+		case 2:
+		  encode_utf8 (charset, c, 0, dst);
+		  break;
+		case 3:
+		  if (XCHARSET_PRIVATE_P (charset))
+		    {
+		      encode_utf8 (charset, c, 0, dst);
+		      ch = 0;
+		    }
+		  else if (ch)
+		    {
+#ifdef ENABLE_COMPOSITE_CHARS
+		      if (EQ (charset, Vcharset_composite))
+			{
+			  if (in_composite)
+			    {
+			      /* #### Bother! We don't know how to
+				 handle this yet. */
+			      encode_utf8 (Vcharset_ascii, '~', 0, dst);
+			    }
+			  else
+			    {
+			      Emchar emch = MAKE_CHAR (Vcharset_composite,
+						       ch & 0x7F, c & 0x7F);
+			      Lisp_Object lstr = composite_char_string (emch);
+			      saved_n = n;
+			      saved_src = src;
+			      in_composite = 1;
+			      src = XSTRING_DATA   (lstr);
+			      n   = XSTRING_LENGTH (lstr);
+			    }
+			}
+		      else
+#endif /* ENABLE_COMPOSITE_CHARS */
+			{
+			  encode_utf8 (charset, ch, c, dst);
+			}
+		      ch = 0;
+		    }
+		  else
+		    {
+		      ch = c;
+		      char_boundary = 0;
+		    }
+		  break;
+		case 4:
+		  if (ch)
+		    {
+		      encode_utf8 (charset, ch, c, dst);
+		      ch = 0;
+		    }
+		  else
+		    {
+		      ch = c;
+		      char_boundary = 0;
+		    }
+		  break;
+		default:
+		  abort ();
+		}
+	    }
+	}
+    }
+
+#ifdef ENABLE_COMPOSITE_CHARS
+  if (in_composite)
+    {
+      n = saved_n;
+      src = saved_src;
+      in_composite = 0;
+      goto back_to_square_n; /* Wheeeeeeeee ..... */
+    }
+#endif
+
+#endif /* not UTF2000 */
+  str->flags = flags;
+  str->ch    = ch;
+  str->iso2022.current_char_boundary = char_boundary;
+#ifndef UTF2000
+  str->iso2022.current_charset = charset;
+#endif
+
+  /* Verbum caro factum est! */
+}
+
+
+/************************************************************************/
+/*                           ISO2022 methods                            */
+/************************************************************************/
+
+/* The following note describes the coding system ISO2022 briefly.
+   Since the intention of this note is to help understand the
+   functions in this file, some parts are NOT ACCURATE or OVERLY
+   SIMPLIFIED.  For thorough understanding, please refer to the
+   original document of ISO2022.
+
+   ISO2022 provides many mechanisms to encode several character sets
+   in 7-bit and 8-bit environments.  For 7-bit environments, all text
+   is encoded using bytes less than 128.  This may make the encoded
+   text a little bit longer, but the text passes more easily through
+   several gateways, some of which strip off MSB (Most Signigant Bit).
+
+   There are two kinds of character sets: control character set and
+   graphic character set.  The former contains control characters such
+   as `newline' and `escape' to provide control functions (control
+   functions are also provided by escape sequences).  The latter
+   contains graphic characters such as 'A' and '-'.  Emacs recognizes
+   two control character sets and many graphic character sets.
+
+   Graphic character sets are classified into one of the following
+   four classes, according to the number of bytes (DIMENSION) and
+   number of characters in one dimension (CHARS) of the set:
+   - DIMENSION1_CHARS94
+   - DIMENSION1_CHARS96
+   - DIMENSION2_CHARS94
+   - DIMENSION2_CHARS96
+
+   In addition, each character set is assigned an identification tag,
+   unique for each set, called "final character" (denoted as <F>
+   hereafter).  The <F> of each character set is decided by ECMA(*)
+   when it is registered in ISO.  The code range of <F> is 0x30..0x7F
+   (0x30..0x3F are for private use only).
+
+   Note (*): ECMA = European Computer Manufacturers Association
+
+   Here are examples of graphic character set [NAME(<F>)]:
+	o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
+	o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
+	o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
+	o DIMENSION2_CHARS96 -- none for the moment
+
+   A code area (1 byte = 8 bits) is divided into 4 areas, C0, GL, C1, and GR.
+	C0 [0x00..0x1F] -- control character plane 0
+	GL [0x20..0x7F] -- graphic character plane 0
+	C1 [0x80..0x9F] -- control character plane 1
+	GR [0xA0..0xFF] -- graphic character plane 1
+
+   A control character set is directly designated and invoked to C0 or
+   C1 by an escape sequence.  The most common case is that:
+   - ISO646's  control character set is designated/invoked to C0, and
+   - ISO6429's control character set is designated/invoked to C1,
+   and usually these designations/invocations are omitted in encoded
+   text.  In a 7-bit environment, only C0 can be used, and a control
+   character for C1 is encoded by an appropriate escape sequence to
+   fit into the environment.  All control characters for C1 are
+   defined to have corresponding escape sequences.
+
+   A graphic character set is at first designated to one of four
+   graphic registers (G0 through G3), then these graphic registers are
+   invoked to GL or GR.  These designations and invocations can be
+   done independently.  The most common case is that G0 is invoked to
+   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
+   these invocations and designations are omitted in encoded text.
+   In a 7-bit environment, only GL can be used.
+
+   When a graphic character set of CHARS94 is invoked to GL, codes
+   0x20 and 0x7F of the GL area work as control characters SPACE and
+   DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
+   be used.
+
+   There are two ways of invocation: locking-shift and single-shift.
+   With locking-shift, the invocation lasts until the next different
+   invocation, whereas with single-shift, the invocation affects the
+   following character only and doesn't affect the locking-shift
+   state.  Invocations are done by the following control characters or
+   escape sequences:
+
+   ----------------------------------------------------------------------
+   abbrev  function	             cntrl escape seq	description
+   ----------------------------------------------------------------------
+   SI/LS0  (shift-in)		     0x0F  none		invoke G0 into GL
+   SO/LS1  (shift-out)		     0x0E  none		invoke G1 into GL
+   LS2     (locking-shift-2)	     none  ESC 'n'	invoke G2 into GL
+   LS3     (locking-shift-3)	     none  ESC 'o'	invoke G3 into GL
+   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
+   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
+   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
+   SS2     (single-shift-2)	     0x8E  ESC 'N'	invoke G2 for one char
+   SS3     (single-shift-3)	     0x8F  ESC 'O'	invoke G3 for one char
+   ----------------------------------------------------------------------
+   (*) These are not used by any known coding system.
+
+   Control characters for these functions are defined by macros
+   ISO_CODE_XXX in `coding.h'.
+
+   Designations are done by the following escape sequences:
+   ----------------------------------------------------------------------
+   escape sequence	description
+   ----------------------------------------------------------------------
+   ESC '(' <F>		designate DIMENSION1_CHARS94<F> to G0
+   ESC ')' <F>		designate DIMENSION1_CHARS94<F> to G1
+   ESC '*' <F>		designate DIMENSION1_CHARS94<F> to G2
+   ESC '+' <F>		designate DIMENSION1_CHARS94<F> to G3
+   ESC ',' <F>		designate DIMENSION1_CHARS96<F> to G0 (*)
+   ESC '-' <F>		designate DIMENSION1_CHARS96<F> to G1
+   ESC '.' <F>		designate DIMENSION1_CHARS96<F> to G2
+   ESC '/' <F>		designate DIMENSION1_CHARS96<F> to G3
+   ESC '$' '(' <F>	designate DIMENSION2_CHARS94<F> to G0 (**)
+   ESC '$' ')' <F>	designate DIMENSION2_CHARS94<F> to G1
+   ESC '$' '*' <F>	designate DIMENSION2_CHARS94<F> to G2
+   ESC '$' '+' <F>	designate DIMENSION2_CHARS94<F> to G3
+   ESC '$' ',' <F>	designate DIMENSION2_CHARS96<F> to G0 (*)
+   ESC '$' '-' <F>	designate DIMENSION2_CHARS96<F> to G1
+   ESC '$' '.' <F>	designate DIMENSION2_CHARS96<F> to G2
+   ESC '$' '/' <F>	designate DIMENSION2_CHARS96<F> to G3
+   ----------------------------------------------------------------------
+
+   In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
+   of dimension 1, chars 94, and final character <F>, etc...
+
+   Note (*): Although these designations are not allowed in ISO2022,
+   Emacs accepts them on decoding, and produces them on encoding
+   CHARS96 character sets in a coding system which is characterized as
+   7-bit environment, non-locking-shift, and non-single-shift.
+
+   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
+   '(' can be omitted.  We refer to this as "short-form" hereafter.
+
+   Now you may notice that there are a lot of ways for encoding the
    same multilingual text in ISO2022.  Actually, there exist many
-   coding systems such as Compound Text (used in X's inter client
+   coding systems such as Compound Text (used in X11's inter client
    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
    localized platforms), and all of these are variants of ISO2022.
@@ -3159,19 +4205,19 @@ Return the corresponding character code in Big5.
    sequences: ISO6429's direction specification and Emacs' private
    sequence for specifying character composition.
 
-   ISO6429's direction specification takes the following format:
+   ISO6429's direction specification takes the following form:
 	o CSI ']'      -- end of the current direction
 	o CSI '0' ']'  -- end of the current direction
 	o CSI '1' ']'  -- start of left-to-right text
 	o CSI '2' ']'  -- start of right-to-left text
    The control character CSI (0x9B: control sequence introducer) is
-   abbreviated to the escape sequence ESC '[' in 7-bit environment.
+   abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 
-   Character composition specification takes the following format:
+   Character composition specification takes the following form:
 	o ESC '0' -- start character composition
 	o ESC '1' -- end character composition
-   Since these are not standard escape sequences of any ISO, the use
-   of them for these meanings is restricted to Emacs only.  */
+   Since these are not standard escape sequences of any ISO standard,
+   their use with these meanings is restricted to Emacs only.  */
 
 static void
 reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
@@ -3195,8 +4241,10 @@ reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
   iso->invalid_switch_dir = 0;
   iso->output_direction_sequence = 0;
   iso->output_literally = 0;
+#ifdef ENABLE_COMPOSITE_CHARS
   if (iso->composite_chars)
     Dynarr_reset (iso->composite_chars);
+#endif
 }
 
 static int
@@ -3324,6 +4372,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
 	  reg = 3; half = 1;
 	  goto locking_shift;
 
+#ifdef ENABLE_COMPOSITE_CHARS
 	  /**** composite ****/
 
 	case '0':
@@ -3337,6 +4386,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
 	  *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
 	    ~CODING_STATE_COMPOSITE;
 	  return 1;
+#endif /* ENABLE_COMPOSITE_CHARS */
 
 	  /**** directionality ****/
 
@@ -3593,11 +4643,15 @@ static int
 detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
 		       unsigned int n)
 {
-  int c;
   int mask;
 
   /* #### There are serious deficiencies in the recognition mechanism
-     here.  This needs to be much smarter if it's going to cut it. */
+     here.  This needs to be much smarter if it's going to cut it.
+     The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while
+     it should be detected as Latin-1.
+     All the ISO2022 stuff in this file should be synced up with the
+     code from FSF Emacs-20.4, in which Mule should be more or less stable.
+     Perhaps we should wait till R2L works in FSF Emacs? */
 
   if (!st->iso2022.initted)
     {
@@ -3617,7 +4671,7 @@ detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
 
   while (n--)
     {
-      c = *src++;
+      int c = *src++;
       if (c >= 0xA0)
 	{
 	  mask &= ~CODING_CATEGORY_ISO_7_MASK;
@@ -3716,7 +4770,7 @@ postprocess_iso2022_mask (int mask)
    need to handle the CSI differently. */
 
 static void
-restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
+restore_left_to_right_direction (Lisp_Coding_System *codesys,
 				 unsigned_char_dynarr *dst,
 				 unsigned int *flags,
 				 int internal_p)
@@ -3747,7 +4801,7 @@ restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
    need to handle the CSI differently. */
 
 static void
-ensure_correct_direction (int direction, struct Lisp_Coding_System *codesys,
+ensure_correct_direction (int direction, Lisp_Coding_System *codesys,
 			  unsigned_char_dynarr *dst, unsigned int *flags,
 			  int internal_p)
 {
@@ -3780,23 +4834,25 @@ static void
 decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
 		       unsigned_char_dynarr *dst, unsigned int n)
 {
-  unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-  Lisp_Object coding_system;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
+#ifdef ENABLE_COMPOSITE_CHARS
   unsigned_char_dynarr *real_dst = dst;
+#endif
+  Lisp_Object coding_system;
 
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
   XSETCODING_SYSTEM (coding_system, str->codesys);
 
+#ifdef ENABLE_COMPOSITE_CHARS
   if (flags & CODING_STATE_COMPOSITE)
     dst = str->iso2022.composite_chars;
+#endif /* ENABLE_COMPOSITE_CHARS */
 
   while (n--)
     {
-      c = *src++;
+      unsigned char c = *src++;
       if (flags & CODING_STATE_ESCAPE)
 	{	/* Within ESC sequence */
 	  int retval = parse_iso2022_esc (coding_system, &str->iso2022,
@@ -3806,6 +4862,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
 	    {
 	      switch (str->iso2022.esc)
 		{
+#ifdef ENABLE_COMPOSITE_CHARS
 		case ISO_ESC_START_COMPOSITE:
 		  if (str->iso2022.composite_chars)
 		    Dynarr_reset (str->iso2022.composite_chars);
@@ -3824,6 +4881,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
 		    Dynarr_add_many (dst, comstr, len);
 		    break;
 		  }
+#endif /* ENABLE_COMPOSITE_CHARS */
 
 		case ISO_ESC_LITERAL:
 		  DECODE_ADD_BINARY_CHAR (c, dst);
@@ -3898,7 +4956,9 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
       else
 	{			/* Graphic characters */
 	  Lisp_Object charset;
-	  int lb;
+#ifndef UTF2000
+	  Charset_ID lb;
+#endif
 	  int reg;
 
 	  DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
@@ -3911,7 +4971,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
 	  charset = str->iso2022.charset[reg];
 
 	  /* Error checking: */
-	  if (NILP (charset) || str->iso2022.invalid_designated[reg]
+	  if (! CHARSETP (charset)
+	      || str->iso2022.invalid_designated[reg]
 	      || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
 		  && XCHARSET_CHARS (charset) == 94))
 	    /* Mrmph.  We are trying to invoke a register that has no
@@ -3940,6 +5001,22 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
 		    charset = new_charset;
 		}
 
+#ifdef UTF2000
+	      if (XCHARSET_DIMENSION (charset) == 1)
+		{
+		  DECODE_OUTPUT_PARTIAL_CHAR (ch);
+		  DECODE_ADD_UCS_CHAR
+		    (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+		}
+	      else if (ch)
+		{
+		  DECODE_ADD_UCS_CHAR
+		    (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+		  ch = 0;
+		}
+	      else
+		ch = c;
+#else
 	      lb = XCHARSET_LEADING_BYTE (charset);
 	      switch (XCHARSET_REP_BYTES (charset))
 		{
@@ -3988,6 +5065,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
 		  else
 		    ch = c;
 		}
+#endif
 	    }
 
 	  if (!ch)
@@ -4000,7 +5078,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
   if (flags & CODING_STATE_END)
     DECODE_OUTPUT_PARTIAL_CHAR (ch);
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
 }
 
 
@@ -4012,7 +5091,8 @@ static void
 iso2022_designate (Lisp_Object charset, unsigned char reg,
 		   struct encoding_stream *str, unsigned_char_dynarr *dst)
 {
-  CONST char *inter94 = "()*+", *inter96= ",-./";
+  static CONST char inter94[] = "()*+";
+  static CONST char inter96[] = ",-./";
   unsigned int type;
   unsigned char final;
   Lisp_Object old_charset = str->iso2022.charset[reg];
@@ -4100,28 +5180,244 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
 		       unsigned_char_dynarr *dst, unsigned int n)
 {
   unsigned char charmask, c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
   unsigned char char_boundary;
   struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  struct Lisp_Coding_System *codesys = str->codesys;
+  unsigned int flags          = str->flags;
+  Emchar ch                   = str->ch;
+  Lisp_Coding_System *codesys = str->codesys;
+  eol_type_t eol_type         = CODING_SYSTEM_EOL_TYPE (str->codesys);
   int i;
   Lisp_Object charset;
   int half;
+#ifdef UTF2000
+  unsigned int byte1, byte2;
+#endif
 
+#ifdef ENABLE_COMPOSITE_CHARS
   /* flags for handling composite chars.  We do a little switcharoo
      on the source while we're outputting the composite char. */
   unsigned int saved_n = 0;
   CONST unsigned char *saved_src = NULL;
   int in_composite = 0;
+#endif /* ENABLE_COMPOSITE_CHARS */
 
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
   char_boundary = str->iso2022.current_char_boundary;
   charset = str->iso2022.current_charset;
   half = str->iso2022.current_half;
 
+#ifdef ENABLE_COMPOSITE_CHARS
  back_to_square_n:
+#endif
+#ifdef UTF2000
+  while (n--)
+    {
+      c = *src++;
+
+      switch (char_boundary)
+	{
+	case 0:
+	  if ( c >= 0xfc )
+	    {
+	      ch = c & 0x01;
+	      char_boundary = 5;
+	    }
+	  else if ( c >= 0xf8 )
+	    {
+	      ch = c & 0x03;
+	      char_boundary = 4;
+	    }
+	  else if ( c >= 0xf0 )
+	    {
+	      ch = c & 0x07;
+	      char_boundary = 3;
+	    }
+	  else if ( c >= 0xe0 )
+	    {
+	      ch = c & 0x0f;
+	      char_boundary = 2;
+	    }
+	  else if ( c >= 0xc0 )
+	    {
+	      ch = c & 0x1f;
+	      char_boundary = 1;
+	    }
+	  else
+	    {
+	      ch = 0;
+
+	      restore_left_to_right_direction (codesys, dst, &flags, 0);
+	      
+	      /* Make sure G0 contains ASCII */
+	      if ((c > ' ' && c < ISO_CODE_DEL) ||
+		  !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys))
+		{
+		  ensure_normal_shift (str, dst);
+		  iso2022_designate (Vcharset_ascii, 0, str, dst);
+		}
+	      
+	      /* If necessary, restore everything to the default state
+		 at end-of-line */
+	      if (c == '\n' &&
+		  !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys)))
+		{
+		  restore_left_to_right_direction (codesys, dst, &flags, 0);
+
+		  ensure_normal_shift (str, dst);
+
+		  for (i = 0; i < 4; i++)
+		    {
+		      Lisp_Object initial_charset =
+			CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+		      iso2022_designate (initial_charset, i, str, dst);
+		    }
+		}
+	      if (c == '\n')
+		{
+		  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+		    Dynarr_add (dst, '\r');
+		  if (eol_type != EOL_CR)
+		    Dynarr_add (dst, c);
+		}
+	      else
+		{
+		  if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+		      && fit_to_be_escape_quoted (c))
+		    Dynarr_add (dst, ISO_CODE_ESC);
+		  Dynarr_add (dst, c);
+		}
+	      char_boundary = 0;
+	    }
+	  break;
+	case 1:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  
+	  char_boundary = 0;
+	  if ( (0x80 <= ch) && (ch <= 0x9f) )
+	    {
+	      charmask = (half == 0 ? 0x00 : 0x80);
+	  
+	      if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+		  && fit_to_be_escape_quoted (ch))
+		Dynarr_add (dst, ISO_CODE_ESC);
+	      /* you asked for it ... */
+	      Dynarr_add (dst, ch);
+	    }
+	  else
+	    {
+	      int reg;
+
+	      BREAKUP_CHAR (ch, charset, byte1, byte2);
+	      ensure_correct_direction (XCHARSET_DIRECTION (charset),
+					codesys, dst, &flags, 0);
+
+	      /* Now determine which register to use. */
+	      reg = -1;
+	      for (i = 0; i < 4; i++)
+		{
+		  if (EQ (charset, str->iso2022.charset[i]) ||
+		      EQ (charset,
+			  CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)))
+		    {
+		      reg = i;
+		      break;
+		    }
+		}
+	      
+	      if (reg == -1)
+		{
+		  if (XCHARSET_GRAPHIC (charset) != 0)
+		    {
+		      if (!NILP (str->iso2022.charset[1]) &&
+			  (!CODING_SYSTEM_ISO2022_SEVEN (codesys) ||
+			   CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys)))
+			reg = 1;
+		      else if (!NILP (str->iso2022.charset[2]))
+			reg = 2;
+		      else if (!NILP (str->iso2022.charset[3]))
+			reg = 3;
+		      else
+			reg = 0;
+		    }
+		  else
+		    reg = 0;
+		}
+	      
+	      iso2022_designate (charset, reg, str, dst);
+	      
+	      /* Now invoke that register. */
+	      switch (reg)
+		{
+		case 0:
+		  ensure_normal_shift (str, dst);
+		  half = 0;
+		  break;
+		  
+		case 1:
+		  if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+		    {
+		      ensure_shift_out (str, dst);
+		      half = 0;
+		    }
+		  else
+		    half = 1;
+		  break;
+		  
+		case 2:
+		  if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+		    {
+		      Dynarr_add (dst, ISO_CODE_ESC);
+		      Dynarr_add (dst, 'N');
+		      half = 0;
+		    }
+		  else
+		    {
+		      Dynarr_add (dst, ISO_CODE_SS2);
+		      half = 1;
+		    }
+		  break;
+		  
+		case 3:
+		  if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+		    {
+		      Dynarr_add (dst, ISO_CODE_ESC);
+		      Dynarr_add (dst, 'O');
+		      half = 0;
+		    }
+		  else
+		    {
+		      Dynarr_add (dst, ISO_CODE_SS3);
+		      half = 1;
+		    }
+		  break;
+		  
+		default:
+		  abort ();
+		}
+	      
+	      charmask = (half == 0 ? 0x00 : 0x80);
+	      
+	      switch (XCHARSET_DIMENSION (charset))
+		{
+		case 1:
+		  Dynarr_add (dst, byte1 | charmask);
+		  break;
+		case 2:
+		  Dynarr_add (dst, byte1 | charmask);
+		  Dynarr_add (dst, byte2 | charmask);
+		  break;
+		default:
+		  abort ();
+		}
+	    }
+	  ch =0;
+	  break;
+	default:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  char_boundary--;
+	}
+    }
+#else /* not UTF2000 */
+
   while (n--)
     {
       c = *src++;
@@ -4180,7 +5476,10 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
 	  if (LEADING_BYTE_PREFIX_P(c))
 	    ch = c;
 	  else if (!EQ (charset, Vcharset_control_1)
-		   && !EQ (charset, Vcharset_composite))
+#ifdef ENABLE_COMPOSITE_CHARS
+		   && !EQ (charset, Vcharset_composite)
+#endif
+		   )
 	    {
 	      int reg;
 
@@ -4300,6 +5599,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
 		    }
 		  else if (ch)
 		    {
+#ifdef ENABLE_COMPOSITE_CHARS
 		      if (EQ (charset, Vcharset_composite))
 			{
 			  if (in_composite)
@@ -4323,6 +5623,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
 			    }
 			}
 		      else
+#endif /* ENABLE_COMPOSITE_CHARS */
 			{
 			  Dynarr_add (dst, ch & charmask);
 			  Dynarr_add (dst, c & charmask);
@@ -4354,7 +5655,9 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
 	    }
 	}
     }
+#endif /* not UTF2000 */
 
+#ifdef ENABLE_COMPOSITE_CHARS
   if (in_composite)
     {
       n = saved_n;
@@ -4364,8 +5667,13 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
       Dynarr_add (dst, '1'); /* end composing */
       goto back_to_square_n; /* Wheeeeeeeee ..... */
     }
+#endif /* ENABLE_COMPOSITE_CHARS */
 
+#ifdef UTF2000
+  if ( (char_boundary == 0) && flags & CODING_STATE_END)
+#else
   if (char_boundary && flags & CODING_STATE_END)
+#endif
     {
       restore_left_to_right_direction (codesys, dst, &flags, 0);
       ensure_normal_shift (str, dst);
@@ -4377,7 +5685,8 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
 	}
     }
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
   str->iso2022.current_char_boundary = char_boundary;
   str->iso2022.current_charset = charset;
   str->iso2022.current_half = half;
@@ -4398,12 +5707,10 @@ decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
 			     unsigned_char_dynarr *dst, unsigned int n)
 {
   unsigned char c;
-  unsigned int flags, ch;
-  enum eol_type eol_type;
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = str->eol_type;
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = str->eol_type;
 
   while (n--)
     {
@@ -4416,7 +5723,8 @@ decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
 
   DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
 }
 
 static void
@@ -4425,15 +5733,71 @@ encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
 {
   unsigned char c;
   struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
-  unsigned int flags, ch;
-  enum eol_type eol_type;
-
-  CODING_STREAM_DECOMPOSE (str, flags, ch);
-  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+  unsigned int flags  = str->flags;
+  unsigned int ch     = str->ch;
+  eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
 
   while (n--)
     {
-      c = *src++;
+      c = *src++;	  
+#ifdef UTF2000
+      switch (char_boundary)
+	{
+	case 0:
+	  if ( c >= 0xfc )
+	    {
+	      ch = c & 0x01;
+	      char_boundary = 5;
+	    }
+	  else if ( c >= 0xf8 )
+	    {
+	      ch = c & 0x03;
+	      char_boundary = 4;
+	    }
+	  else if ( c >= 0xf0 )
+	    {
+	      ch = c & 0x07;
+	      char_boundary = 3;
+	    }
+	  else if ( c >= 0xe0 )
+	    {
+	      ch = c & 0x0f;
+	      char_boundary = 2;
+	    }
+	  else if ( c >= 0xc0 )
+	    {
+	      ch = c & 0x1f;
+	      char_boundary = 1;
+	    }
+	  else
+	    {
+	      ch = 0;
+
+	      if (c == '\n')
+		{
+		  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+		    Dynarr_add (dst, '\r');
+		  if (eol_type != EOL_CR)
+		    Dynarr_add (dst, c);
+		}
+	      else
+		Dynarr_add (dst, c);
+	      char_boundary = 0;
+	    }
+	  break;
+	case 1:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  Dynarr_add (dst, ch & 0xff);
+	  char_boundary = 0;
+	  break;
+	default:
+	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  char_boundary--;
+	}
+#else /* not UTF2000 */
       if (c == '\n')
 	{
 	  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
@@ -4469,9 +5833,14 @@ encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
 	     untranslatable character, so ignore it */
 	  ch = 0;
 	}
+#endif /* not UTF2000 */
     }
 
-  CODING_STREAM_COMPOSE (str, flags, ch);
+  str->flags = flags;
+  str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
 }
 
 
@@ -4484,24 +5853,27 @@ static Bufbyte_dynarr *conversion_in_dynarr;
 
 /* Determine coding system from coding format */
 
-#define FILE_NAME_CODING_SYSTEM 			\
- ((NILP (Vfile_name_coding_system) ||			\
-   (EQ ((Vfile_name_coding_system), Qbinary))) ?	\
-  Qnil : Fget_coding_system (Vfile_name_coding_system))
-
 /* #### not correct for all values of `fmt'! */
+static Lisp_Object
+external_data_format_to_coding_system (enum external_data_format fmt)
+{
+  switch (fmt)
+    {
+    case FORMAT_FILENAME:
+    case FORMAT_TERMINAL:
+      if (EQ (Vfile_name_coding_system, Qnil) ||
+	  EQ (Vfile_name_coding_system, Qbinary))
+	return Qnil;
+      else
+	return Fget_coding_system (Vfile_name_coding_system);
 #ifdef MULE
-#define FMT_CODING_SYSTEM(fmt)					\
- (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM     :	\
-  ((fmt) == FORMAT_CTEXT   ) ? Fget_coding_system (Qctext) :	\
-  ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM     :	\
-  Qnil)
-#else
-#define FMT_CODING_SYSTEM(fmt)					\
- (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM     :	\
-  ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM     :	\
-  Qnil)
+    case FORMAT_CTEXT:
+      return Fget_coding_system (Qctext);
 #endif
+    default:
+      return Qnil;
+    }
+}
 
 Extbyte *
 convert_to_external_format (CONST Bufbyte *ptr,
@@ -4509,7 +5881,7 @@ convert_to_external_format (CONST Bufbyte *ptr,
 			    Extcount *len_out,
 			    enum external_data_format fmt)
 {
-  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
 
   if (!conversion_out_dynarr)
     conversion_out_dynarr = Dynarr_new (Extbyte);
@@ -4522,12 +5894,17 @@ convert_to_external_format (CONST Bufbyte *ptr,
 
       for (; ptr < end;)
         {
+#ifdef UTF2000
+          Bufbyte c =
+	    (*ptr < 0xc0) ? *ptr :
+	    ((*ptr & 0x1f) << 6) | (*(ptr+1) & 0x3f);
+#else
           Bufbyte c =
             (BYTE_ASCII_P (*ptr))		   ? *ptr :
             (*ptr == LEADING_BYTE_CONTROL_1)	   ? (*(ptr+1) - 0x20) :
             (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
             '~';
-
+#endif
           Dynarr_add (conversion_out_dynarr, (Extbyte) c);
           INC_CHARPTR (ptr);
         }
@@ -4577,7 +5954,7 @@ convert_from_external_format (CONST Extbyte *ptr,
 			      Bytecount *len_out,
 			      enum external_data_format fmt)
 {
-  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+  Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
 
   if (!conversion_in_dynarr)
     conversion_in_dynarr = Dynarr_new (Bufbyte);
@@ -4634,7 +6011,7 @@ convert_from_external_format (CONST Extbyte *ptr,
 /************************************************************************/
 
 void
-syms_of_mule_coding (void)
+syms_of_file_coding (void)
 {
   defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
   deferror (&Qcoding_system_error, "coding-system-error",
@@ -4647,6 +6024,7 @@ syms_of_mule_coding (void)
   DEFSUBR (Fcoding_system_name);
   DEFSUBR (Fmake_coding_system);
   DEFSUBR (Fcopy_coding_system);
+  DEFSUBR (Fdefine_coding_system_alias);
   DEFSUBR (Fsubsidiary_coding_system);
 
   DEFSUBR (Fcoding_system_type);
@@ -4670,12 +6048,19 @@ syms_of_mule_coding (void)
   DEFSUBR (Fencode_shift_jis_char);
   DEFSUBR (Fdecode_big5_char);
   DEFSUBR (Fencode_big5_char);
+  DEFSUBR (Fset_ucs_char);
+  DEFSUBR (Fucs_char);
+  DEFSUBR (Fset_char_ucs);
+  DEFSUBR (Fchar_ucs);
 #endif /* MULE */
   defsymbol (&Qcoding_system_p, "coding-system-p");
   defsymbol (&Qno_conversion, "no-conversion");
+  defsymbol (&Qraw_text, "raw-text");
 #ifdef MULE
   defsymbol (&Qbig5, "big5");
   defsymbol (&Qshift_jis, "shift-jis");
+  defsymbol (&Qucs4, "ucs-4");
+  defsymbol (&Qutf8, "utf-8");
   defsymbol (&Qccl, "ccl");
   defsymbol (&Qiso2022, "iso2022");
 #endif /* MULE */
@@ -4719,6 +6104,10 @@ syms_of_mule_coding (void)
 	     "shift-jis");
   defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
 	     "big5");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4],
+	     "ucs-4");
+  defsymbol (&coding_category_symbol[CODING_CATEGORY_UTF8],
+	     "utf-8");
   defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_7],
 	     "iso-7");
   defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_DESIGNATE],
@@ -4735,7 +6124,7 @@ syms_of_mule_coding (void)
 }
 
 void
-lstream_type_create_mule_coding (void)
+lstream_type_create_file_coding (void)
 {
   LSTREAM_HAS_METHOD (decoding, reader);
   LSTREAM_HAS_METHOD (decoding, writer);
@@ -4755,7 +6144,7 @@ lstream_type_create_mule_coding (void)
 }
 
 void
-vars_of_mule_coding (void)
+vars_of_file_coding (void)
 {
   int i;
 
@@ -4817,11 +6206,11 @@ Setting this to nil does not do anything.
 }
 
 void
-complex_vars_of_mule_coding (void)
+complex_vars_of_file_coding (void)
 {
-  staticpro (&Vcoding_system_hashtable);
-  Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
-						  HASHTABLE_EQ);
+  staticpro (&Vcoding_system_hash_table);
+  Vcoding_system_hash_table =
+    make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
 
   the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
 
@@ -4863,13 +6252,43 @@ complex_vars_of_mule_coding (void)
   DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qdecode);
 #endif /* MULE */
   /* Need to create this here or we're really screwed. */
-  Fmake_coding_system (Qno_conversion, Qno_conversion, build_string ("No conversion"),
-		       list2 (Qmnemonic, build_string ("Noconv")));
+  Fmake_coding_system
+    (Qraw_text, Qno_conversion,
+     build_string ("Raw text, which means it converts only line-break-codes."),
+     list2 (Qmnemonic, build_string ("Raw")));
+
+  Fmake_coding_system
+    (Qbinary, Qno_conversion,
+     build_string ("Binary, which means it does not convert anything."),
+     list4 (Qeol_type, Qlf,
+	    Qmnemonic, build_string ("Binary")));
+
+#ifdef UTF2000
+  Fmake_coding_system
+    (Qutf8, Qutf8,
+     build_string ("Coding-system of ISO/IEC 10646 UTF-8."),
+     list2 (Qmnemonic, build_string ("UTF8")));
+#endif
 
-  Fcopy_coding_system (Fcoding_system_property (Qno_conversion, Qeol_lf),
-		       Qbinary);
+  Fdefine_coding_system_alias (Qno_conversion, Qraw_text);
 
   /* Need this for bootstrapping */
   coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
-    Fget_coding_system (Qno_conversion);
+    Fget_coding_system (Qraw_text);
+
+#ifdef UTF2000
+  coding_category_system[CODING_CATEGORY_UTF8]
+   = Fget_coding_system (Qutf8);
+#endif
+
+#ifdef MULE
+  {
+    unsigned int i;
+
+    for (i = 0; i < 65536; i++)
+      ucs_to_mule_table[i] = Qnil;
+  }
+  staticpro (&mule_to_ucs_table);
+  mule_to_ucs_table = Fmake_char_table(Qgeneric);
+#endif /* MULE */
 }