From 03a3cbce5dcbcdf7d2f23f179e3b85be29a50a56 Mon Sep 17 00:00:00 2001
From: tomo <tomo>
Date: Mon, 24 Jul 2000 03:15:03 +0000
Subject: [PATCH] (struct decoding_stream): Rename member `CH' to `CPOS'.
 (reset_decoding_stream): Use `str->cpos' instead of
 `str->ch'. (decode_coding_sjis): Likewise.
 (decode_coding_big5): Likewise. (decode_coding_ucs4):
 Likewise. (decode_coding_utf8): Likewise.
 (parse_iso2022_esc): Don't use `CHARSET_TYPE_*'; modify for
 `CHARSET_BY_ATTRIBUTES'. (decode_coding_iso2022): Use
 `str->cpos' instead of `str->ch'; use `str->counter';
 decode 3, 4 bytes sets. (char_encode_iso2022): Don't use
 `BREAKUP_CHAR'; encode 3, 4 bytes sets.
 (decode_coding_no_conversion): Use `str->cpos' instead of
 `str->ch'.

---
 src/text-coding.c |  212 +++++++++++++++++++++++++++--------------------------
 1 file changed, 109 insertions(+), 103 deletions(-)
diff --git a/src/text-coding.c b/src/text-coding.c
index 7dac5de..94284a1 100644
--- a/src/text-coding.c
+++ b/src/text-coding.c
@@ -2164,10 +2164,8 @@ struct decoding_stream
      Some of these flags are dependent on the coding system. */
   unsigned int flags;
 
-  /* CH holds a partially built-up character.  Since we only deal
-     with one- and two-byte characters at the moment, we only use
-     this to store the first byte of a two-byte character. */
-  unsigned int ch;
+  /* CPOS holds a partially built-up code-point of character. */
+  unsigned int cpos;
 
   /* EOL_TYPE specifies the type of end-of-line conversion that
      currently applies.  We need to keep this separate from the
@@ -2409,7 +2407,7 @@ reset_decoding_stream (struct decoding_stream *str)
   str->combined_char_count = 0;
   str->combining_table = Qnil;
 #endif
-  str->flags = str->ch = 0;
+  str->flags = str->cpos = 0;
 }
 
 static int
@@ -3225,21 +3223,21 @@ decode_coding_sjis (Lstream *decoding, const unsigned char *src,
   unsigned char c;
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
   unsigned int flags  = str->flags;
-  unsigned int ch     = str->ch;
+  unsigned int cpos   = str->cpos;
   eol_type_t eol_type = str->eol_type;
 
   while (n--)
     {
       c = *src++;
 
-      if (ch)
+      if (cpos)
 	{
 	  /* Previous character was first byte of Shift-JIS Kanji char. */
 	  if (BYTE_SJIS_TWO_BYTE_2_P (c))
 	    {
 	      unsigned char e1, e2;
 
-	      DECODE_SJIS (ch, c, e1, e2);
+	      DECODE_SJIS (cpos, c, e1, e2);
 #ifdef UTF2000
 	      DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_japanese_jisx0208,
 					    e1 & 0x7F,
@@ -3252,16 +3250,16 @@ decode_coding_sjis (Lstream *decoding, const unsigned char *src,
 	    }
 	  else
 	    {
-	      DECODE_ADD_BINARY_CHAR (ch, dst);
+	      DECODE_ADD_BINARY_CHAR (cpos, dst);
 	      DECODE_ADD_BINARY_CHAR (c, dst);
 	    }
-	  ch = 0;
+	  cpos = 0;
 	}
       else
 	{
 	  DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
 	  if (BYTE_SJIS_TWO_BYTE_1_P (c))
-	    ch = c;
+	    cpos = c;
 	  else if (BYTE_SJIS_KATAKANA_P (c))
 	    {
 #ifdef UTF2000
@@ -3283,10 +3281,10 @@ decode_coding_sjis (Lstream *decoding, const unsigned char *src,
     label_continue_loop:;
     }
 
-  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+  DECODE_HANDLE_END_OF_CONVERSION (flags, cpos, dst);
 
   str->flags = flags;
-  str->ch    = ch;
+  str->cpos  = cpos;
 }
 
 /* Convert internal character representation to Shift_JIS. */
@@ -3528,45 +3526,45 @@ decode_coding_big5 (Lstream *decoding, const unsigned char *src,
   unsigned char c;
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
   unsigned int flags  = str->flags;
-  unsigned int ch     = str->ch;
+  unsigned int cpos   = str->cpos;
   eol_type_t eol_type = str->eol_type;
 
   while (n--)
     {
       c = *src++;
-      if (ch)
+      if (cpos)
 	{
 	  /* Previous character was first byte of Big5 char. */
 	  if (BYTE_BIG5_TWO_BYTE_2_P (c))
 	    {
 	      unsigned char b1, b2, b3;
-	      DECODE_BIG5 (ch, c, b1, b2, b3);
+	      DECODE_BIG5 (cpos, c, b1, b2, b3);
 	      Dynarr_add (dst, b1);
 	      Dynarr_add (dst, b2);
 	      Dynarr_add (dst, b3);
 	    }
 	  else
 	    {
-	      DECODE_ADD_BINARY_CHAR (ch, dst);
+	      DECODE_ADD_BINARY_CHAR (cpos, dst);
 	      DECODE_ADD_BINARY_CHAR (c, dst);
 	    }
-	  ch = 0;
+	  cpos = 0;
 	}
       else
 	{
 	  DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
 	  if (BYTE_BIG5_TWO_BYTE_1_P (c))
-	    ch = c;
+	    cpos = c;
 	  else
 	    DECODE_ADD_BINARY_CHAR (c, dst);
 	}
     label_continue_loop:;
     }
 
-  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+  DECODE_HANDLE_END_OF_CONVERSION (flags, cpos, dst);
 
   str->flags = flags;
-  str->ch    = ch;
+  str->cpos  = cpos;
 }
 
 /* Convert internally-formatted data to Big5. */
@@ -3719,7 +3717,7 @@ decode_coding_ucs4 (Lstream *decoding, const unsigned char *src,
 {
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
   unsigned int flags = str->flags;
-  unsigned int ch    = str->ch;
+  unsigned int cpos  = str->cpos;
   unsigned char counter = str->counter;
 
   while (n--)
@@ -3728,25 +3726,25 @@ decode_coding_ucs4 (Lstream *decoding, const unsigned char *src,
       switch (counter)
 	{
 	case 0:
-	  ch = c;
+	  cpos = c;
 	  counter = 3;
 	  break;
 	case 1:
-	  DECODE_ADD_UCS_CHAR ((ch << 8) | c, dst);
-	  ch = 0;
+	  DECODE_ADD_UCS_CHAR ((cpos << 8) | c, dst);
+	  cpos = 0;
 	  counter = 0;
 	  break;
 	default:
-	  ch = ( ch << 8 ) | c;
+	  cpos = ( cpos << 8 ) | c;
 	  counter--;
 	}
     }
   if (counter & CODING_STATE_END)
-    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+    DECODE_OUTPUT_PARTIAL_CHAR (cpos);
 
-  str->flags = flags;
-  str->ch    = ch;
-  str->counter = counter;
+  str->flags	= flags;
+  str->cpos	= cpos;
+  str->counter	= counter;
 }
 
 void
@@ -3810,10 +3808,10 @@ decode_coding_utf8 (Lstream *decoding, const unsigned char *src,
 		    unsigned_char_dynarr *dst, unsigned int n)
 {
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-  unsigned int flags  = str->flags;
-  unsigned int ch     = str->ch;
-  eol_type_t eol_type = str->eol_type;
-  unsigned char counter = str->counter;
+  unsigned int flags	= str->flags;
+  unsigned int cpos	= str->cpos;
+  eol_type_t eol_type	= str->eol_type;
+  unsigned char counter	= str->counter;
 
   while (n--)
     {
@@ -3823,27 +3821,27 @@ decode_coding_utf8 (Lstream *decoding, const unsigned char *src,
 	case 0:
 	  if ( c >= 0xfc )
 	    {
-	      ch = c & 0x01;
+	      cpos = c & 0x01;
 	      counter = 5;
 	    }
 	  else if ( c >= 0xf8 )
 	    {
-	      ch = c & 0x03;
+	      cpos = c & 0x03;
 	      counter = 4;
 	    }
 	  else if ( c >= 0xf0 )
 	    {
-	      ch = c & 0x07;
+	      cpos = c & 0x07;
 	      counter = 3;
 	    }
 	  else if ( c >= 0xe0 )
 	    {
-	      ch = c & 0x0f;
+	      cpos = c & 0x0f;
 	      counter = 2;
 	    }
 	  else if ( c >= 0xc0 )
 	    {
-	      ch = c & 0x1f;
+	      cpos = c & 0x1f;
 	      counter = 1;
 	    }
 	  else
@@ -3853,24 +3851,24 @@ decode_coding_utf8 (Lstream *decoding, const unsigned char *src,
 	    }
 	  break;
 	case 1:
-	  ch = ( ch << 6 ) | ( c & 0x3f );
-	  DECODE_ADD_UCS_CHAR (ch, dst);
-	  ch = 0;
+	  cpos = ( cpos << 6 ) | ( c & 0x3f );
+	  DECODE_ADD_UCS_CHAR (cpos, dst);
+	  cpos = 0;
 	  counter = 0;
 	  break;
 	default:
-	  ch = ( ch << 6 ) | ( c & 0x3f );
+	  cpos = ( cpos << 6 ) | ( c & 0x3f );
 	  counter--;
 	}
     label_continue_loop:;
     }
 
   if (flags & CODING_STATE_END)
-    DECODE_OUTPUT_PARTIAL_CHAR (ch);
+    DECODE_OUTPUT_PARTIAL_CHAR (cpos);
 
-  str->flags = flags;
-  str->ch    = ch;
-  str->counter = counter;
+  str->flags	= flags;
+  str->cpos	= cpos;
+  str->counter	= counter;
 }
 
 void
@@ -4383,7 +4381,8 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
 	}
       if (0x40 <= c && c <= 0x42)
 	{
-	  cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c,
+	  /* 94^n-set */
+	  cs = CHARSET_BY_ATTRIBUTES (94, -1, c,
 				      *flags & CODING_STATE_R2L ?
 				      CHARSET_RIGHT_TO_LEFT :
 				      CHARSET_LEFT_TO_RIGHT);
@@ -4394,7 +4393,8 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
 
     default:
       {
-	int type =-1;
+	int chars = 0;
+	int single = 0;
 
 	if (c < '0' || c > '~')
 	  return 0; /* bad final byte */
@@ -4402,15 +4402,15 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
 	if (iso->esc >= ISO_ESC_2_8 &&
 	    iso->esc <= ISO_ESC_2_15)
 	  {
-	    type = ((iso->esc >= ISO_ESC_2_12) ?
-		    CHARSET_TYPE_96 : CHARSET_TYPE_94);
+	    chars = (iso->esc >= ISO_ESC_2_12) ? 96 : 94;
+	    single = 1; /* single-byte */
 	    reg = (iso->esc - ISO_ESC_2_8) & 3;
 	  }
 	else if (iso->esc >= ISO_ESC_2_4_8 &&
 		 iso->esc <= ISO_ESC_2_4_15)
 	  {
-	    type = ((iso->esc >= ISO_ESC_2_4_12) ?
-		    CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94);
+	    chars = (iso->esc >= ISO_ESC_2_4_12) ? 96 : 94;
+	    single = -1; /* multi-byte */
 	    reg = (iso->esc - ISO_ESC_2_4_8) & 3;
 	  }
 	else
@@ -4419,7 +4419,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
 	    abort();
 	  }
 
-	cs = CHARSET_BY_ATTRIBUTES (type, c,
+	cs = CHARSET_BY_ATTRIBUTES (chars, single, c,
 				    *flags & CODING_STATE_R2L ?
 				    CHARSET_RIGHT_TO_LEFT :
 				    CHARSET_LEFT_TO_RIGHT);
@@ -4707,9 +4707,10 @@ decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
 		       unsigned_char_dynarr *dst, unsigned int n)
 {
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
-  unsigned int flags  = str->flags;
-  unsigned int ch     = str->ch;
-  eol_type_t eol_type = str->eol_type;
+  unsigned int flags	= str->flags;
+  unsigned int cpos	= str->cpos;
+  unsigned char counter = str->counter;
+  eol_type_t eol_type	= str->eol_type;
 #ifdef ENABLE_COMPOSITE_CHARS
   unsigned_char_dynarr *real_dst = dst;
 #endif
@@ -4792,7 +4793,8 @@ decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
 		  DECODE_ADD_BINARY_CHAR (c, dst);
 		}
 	    }
-	  ch = 0;
+	  cpos = 0;
+	  counter = 0;
 	}
       else if (BYTE_C0_P (c) || BYTE_C1_P (c))
 	{ /* Control characters */
@@ -4801,11 +4803,16 @@ decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
 
 	  /* If we were in the middle of a character, dump out the
 	     partial character. */
-	  if (ch)
+	  if (counter)
 	    {
 	      COMPOSE_FLUSH_CHARS (str, dst);
-	      DECODE_ADD_BINARY_CHAR (ch, dst);
-	      ch = 0;
+	      while (counter > 0)
+		{
+		  counter--;
+		  DECODE_ADD_BINARY_CHAR
+		    ((unsigned char)(cpos >> (counter * 8)), dst);
+		}
+	      cpos = 0;
 	    }
 
 	  /* If we just saw a single-shift character, dump it out.
@@ -4906,7 +4913,13 @@ decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
 	       to preserve it for the output. */
 	    {
 	      COMPOSE_FLUSH_CHARS (str, dst);
-	      DECODE_OUTPUT_PARTIAL_CHAR (ch);
+	      while (counter > 0)
+		{
+		  counter--;
+		  DECODE_ADD_BINARY_CHAR
+		    ((unsigned char)(cpos >> (counter * 8)), dst);
+		}
+	      cpos = 0;
 	      DECODE_ADD_BINARY_CHAR (c, dst);
 	    }
 
@@ -4928,26 +4941,19 @@ decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
 		}
 
 #ifdef UTF2000
-	      if (XCHARSET_DIMENSION (charset) == 1)
+	      counter++;
+	      if (XCHARSET_DIMENSION (charset) == counter)
 		{
-		  if (ch)
-		    {
-		      COMPOSE_FLUSH_CHARS (str, dst);
-		      DECODE_ADD_BINARY_CHAR (ch, dst);
-		      ch = 0;
-		    }
 		  COMPOSE_ADD_CHAR (str,
-				    MAKE_CHAR (charset, c & 0x7F, 0), dst);
-		}
-	      else if (ch)
-		{
-		  COMPOSE_ADD_CHAR (str,
-				    MAKE_CHAR (charset, ch & 0x7F, c & 0x7F),
+				    DECODE_CHAR (charset,
+						 ((cpos & 0x7F7F7F) << 8)
+						 | (c & 0x7F)),
 				    dst);
-		  ch = 0;
+		  cpos = 0;
+		  counter = 0;
 		}
 	      else
-		ch = c;
+		cpos = (cpos << 8) | c;
 #else
 	      lb = XCHARSET_LEADING_BYTE (charset);
 	      switch (XCHARSET_REP_BYTES (charset))
@@ -5000,7 +5006,7 @@ decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
 #endif
 	    }
 
-	  if (!ch)
+	  if (!cpos)
 	    flags &= CODING_STATE_ISO2022_LOCK;
 	}
 
@@ -5010,10 +5016,11 @@ decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
   if (flags & CODING_STATE_END)
     {
       COMPOSE_FLUSH_CHARS (str, dst);
-      DECODE_OUTPUT_PARTIAL_CHAR (ch);
+      DECODE_OUTPUT_PARTIAL_CHAR (cpos);
     }
-  str->flags = flags;
-  str->ch    = ch;
+  str->flags   = flags;
+  str->cpos    = cpos;
+  str->counter = counter;
 }
 
 
@@ -5124,7 +5131,7 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
   int i;
   Lisp_Object charset = str->iso2022.current_charset;
   int half = str->iso2022.current_half;
-  unsigned int byte1, byte2;
+  int code_point;
 
   if (ch <= 0x7F)
     {
@@ -5186,8 +5193,6 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
       reg = -1;
       for (i = 0; i < 4; i++)
 	{
-	  int code_point;
-
 	  if ((CHARSETP (charset = str->iso2022.charset[i])
 	       && ((code_point = charset_code_point (charset, ch)) >= 0))
 	      ||
@@ -5196,16 +5201,6 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
 		= CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))
 	       && ((code_point = charset_code_point (charset, ch)) >= 0)))
 	    {
-	      if (XCHARSET_DIMENSION (charset) == 1)
-		{
-		  byte1 = code_point;
-		  byte2 = 0;
-		}
-	      else /* if (XCHARSET_DIMENSION (charset) == 2) */
-		{
-		  byte1 = code_point >> 8;
-		  byte2 = code_point & 255;
-		}
 	      reg = i;
 	      break;
 	    }
@@ -5217,18 +5212,18 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
 
 	  while (!EQ (Vdefault_coded_charset_priority_list, Qnil))
 	    {
-	      BREAKUP_CHAR (ch, charset, byte1, byte2);
+	      code_point = ENCODE_CHAR (ch, charset);
 	      if (XCHARSET_FINAL (charset))
 		goto found;
 	      Vdefault_coded_charset_priority_list
 		= Fcdr (Fmemq (XCHARSET_NAME (charset),
 			       Vdefault_coded_charset_priority_list));
 	    }
-	  BREAKUP_CHAR (ch, charset, byte1, byte2);
+	  code_point = ENCODE_CHAR (ch, charset);
 	  if (!XCHARSET_FINAL (charset))
 	    {
 	      charset = Vcharset_ascii;
-	      byte1 = '~';
+	      code_point = '~';
 	    }
 	found:
 	  Vdefault_coded_charset_priority_list
@@ -5309,11 +5304,22 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch,
       switch (XCHARSET_DIMENSION (charset))
 	{
 	case 1:
-	  Dynarr_add (dst, byte1 | charmask);
+	  Dynarr_add (dst, (code_point & 0xFF) | charmask);
 	  break;
 	case 2:
-	  Dynarr_add (dst, byte1 | charmask);
-	  Dynarr_add (dst, byte2 | charmask);
+	  Dynarr_add (dst, ((code_point >> 8) & 0xFF) | charmask);
+	  Dynarr_add (dst, ( code_point       & 0xFF) | charmask);
+	  break;
+	case 3:
+	  Dynarr_add (dst, ((code_point >> 16) & 0xFF) | charmask);
+	  Dynarr_add (dst, ((code_point >>  8) & 0xFF) | charmask);
+	  Dynarr_add (dst, ( code_point        & 0xFF) | charmask);
+	  break;
+	case 4:
+	  Dynarr_add (dst, ((code_point >> 24) & 0xFF) | charmask);
+	  Dynarr_add (dst, ((code_point >> 16) & 0xFF) | charmask);
+	  Dynarr_add (dst, ((code_point >>  8) & 0xFF) | charmask);
+	  Dynarr_add (dst, ( code_point        & 0xFF) | charmask);
 	  break;
 	default:
 	  abort ();
@@ -5355,7 +5361,7 @@ decode_coding_no_conversion (Lstream *decoding, const unsigned char *src,
   unsigned char c;
   struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
   unsigned int flags  = str->flags;
-  unsigned int ch     = str->ch;
+  unsigned int cpos   = str->cpos;
   eol_type_t eol_type = str->eol_type;
 
   while (n--)
@@ -5367,10 +5373,10 @@ decode_coding_no_conversion (Lstream *decoding, const unsigned char *src,
     label_continue_loop:;
     }
 
-  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+  DECODE_HANDLE_END_OF_CONVERSION (flags, cpos, dst);
 
   str->flags = flags;
-  str->ch    = ch;
+  str->cpos  = cpos;
 }
 
 static void
-- 
1.7.10.4