(decode_coding_iso2022): Code-point arguments of `MAKE_CHAR' must be
authortomo <tomo>
Wed, 16 Jun 1999 14:18:37 +0000 (14:18 +0000)
committertomo <tomo>
Wed, 16 Jun 1999 14:18:37 +0000 (14:18 +0000)
smaller than 0x80 in UTF2000.
(encode_coding_iso2022): New implementation for UTF2000.

src/file-coding.c

index 640fb0c..9e9bb5c 100644 (file)
@@ -4837,7 +4837,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                case 2: /* one-byte official */
                  DECODE_OUTPUT_PARTIAL_CHAR (ch);
 #ifdef UTF2000
-                 DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c, 0), dst);
+                 DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst);
 #else
                  Dynarr_add (dst, lb);
                  Dynarr_add (dst, c | 0x80);
@@ -4849,7 +4849,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                    {
                      DECODE_OUTPUT_PARTIAL_CHAR (ch);
 #ifdef UTF2000
-                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c, 0), dst);
+                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0),
+                                         dst);
 #else
                      Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
                      Dynarr_add (dst, lb);
@@ -4861,7 +4862,9 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                      if (ch)
                        {
 #ifdef UTF2000
-                         DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, ch, c), dst);
+                         DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
+                                                       ch & 0x7F,
+                                                       c & 0x7F), dst);
 #else
                          Dynarr_add (dst, lb);
                          Dynarr_add (dst, ch | 0x80);
@@ -4878,7 +4881,9 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
                  if (ch)
                    {
 #ifdef UTF2000
-                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, ch, c), dst);
+                     DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
+                                                   ch & 0x7F,
+                                                   c & 0x7F), dst);
 #else
                      Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
                      Dynarr_add (dst, lb);
@@ -5007,12 +5012,15 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
   unsigned char char_boundary;
   struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
   unsigned int flags          = str->flags;
-  unsigned int ch             = str->ch;
+  Emchar ch                   = str->ch;
   Lisp_Coding_System *codesys = str->codesys;
   eol_type_t eol_type         = CODING_SYSTEM_EOL_TYPE (str->codesys);
   int i;
   Lisp_Object charset;
   int half;
+#ifdef UTF2000
+  unsigned int byte1, byte2;
+#endif
 
 #ifdef ENABLE_COMPOSITE_CHARS
   /* flags for handling composite chars.  We do a little switcharoo
@@ -5029,6 +5037,216 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
 #ifdef ENABLE_COMPOSITE_CHARS
  back_to_square_n:
 #endif
+#ifdef UTF2000
+  while (n--)
+    {
+      c = *src++;
+
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+
+             restore_left_to_right_direction (codesys, dst, &flags, 0);
+             
+             /* Make sure G0 contains ASCII */
+             if ((c > ' ' && c < ISO_CODE_DEL) ||
+                 !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys))
+               {
+                 ensure_normal_shift (str, dst);
+                 iso2022_designate (Vcharset_ascii, 0, str, dst);
+               }
+             
+             /* If necessary, restore everything to the default state
+                at end-of-line */
+             if (c == '\n' &&
+                 !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys)))
+               {
+                 restore_left_to_right_direction (codesys, dst, &flags, 0);
+
+                 ensure_normal_shift (str, dst);
+
+                 for (i = 0; i < 4; i++)
+                   {
+                     Lisp_Object initial_charset =
+                       CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+                     iso2022_designate (initial_charset, i, str, dst);
+                   }
+               }
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               {
+                 if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+                     && fit_to_be_escape_quoted (c))
+                   Dynarr_add (dst, ISO_CODE_ESC);
+                 Dynarr_add (dst, c);
+               }
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         
+         char_boundary = 0;
+         if ( (0x80 <= ch) && (ch <= 0x9f) )
+           {
+             charmask = (half == 0 ? 0x00 : 0x80);
+         
+             if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+                 && fit_to_be_escape_quoted (ch))
+               Dynarr_add (dst, ISO_CODE_ESC);
+             /* you asked for it ... */
+             Dynarr_add (dst, ch);
+           }
+         else
+           {
+             int reg;
+
+             BREAKUP_CHAR (ch, charset, byte1, byte2);
+             ensure_correct_direction (XCHARSET_DIRECTION (charset),
+                                       codesys, dst, &flags, 0);
+
+             /* Now determine which register to use. */
+             reg = -1;
+             for (i = 0; i < 4; i++)
+               {
+                 if (EQ (charset, str->iso2022.charset[i]) ||
+                     EQ (charset,
+                         CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)))
+                   {
+                     reg = i;
+                     break;
+                   }
+               }
+             
+             if (reg == -1)
+               {
+                 if (XCHARSET_GRAPHIC (charset) != 0)
+                   {
+                     if (!NILP (str->iso2022.charset[1]) &&
+                         (!CODING_SYSTEM_ISO2022_SEVEN (codesys) ||
+                          CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys)))
+                       reg = 1;
+                     else if (!NILP (str->iso2022.charset[2]))
+                       reg = 2;
+                     else if (!NILP (str->iso2022.charset[3]))
+                       reg = 3;
+                     else
+                       reg = 0;
+                   }
+                 else
+                   reg = 0;
+               }
+             
+             iso2022_designate (charset, reg, str, dst);
+             
+             /* Now invoke that register. */
+             switch (reg)
+               {
+               case 0:
+                 ensure_normal_shift (str, dst);
+                 half = 0;
+                 break;
+                 
+               case 1:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+                   {
+                     ensure_shift_out (str, dst);
+                     half = 0;
+                   }
+                 else
+                   half = 1;
+                 break;
+                 
+               case 2:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+                   {
+                     Dynarr_add (dst, ISO_CODE_ESC);
+                     Dynarr_add (dst, 'N');
+                     half = 0;
+                   }
+                 else
+                   {
+                     Dynarr_add (dst, ISO_CODE_SS2);
+                     half = 1;
+                   }
+                 break;
+                 
+               case 3:
+                 if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+                   {
+                     Dynarr_add (dst, ISO_CODE_ESC);
+                     Dynarr_add (dst, 'O');
+                     half = 0;
+                   }
+                 else
+                   {
+                     Dynarr_add (dst, ISO_CODE_SS3);
+                     half = 1;
+                   }
+                 break;
+                 
+               default:
+                 abort ();
+               }
+             
+             charmask = (half == 0 ? 0x00 : 0x80);
+             
+             switch (XCHARSET_DIMENSION (charset))
+               {
+               case 1:
+                 Dynarr_add (dst, byte1 | charmask);
+                 break;
+               case 2:
+                 Dynarr_add (dst, byte1 | charmask);
+                 Dynarr_add (dst, byte2 | charmask);
+                 break;
+               default:
+                 abort ();
+               }
+           }
+         ch =0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+    }
+#else /* not UTF2000 */
+
   while (n--)
     {
       c = *src++;
@@ -5080,11 +5298,6 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
          char_boundary = 1;
        }
 
-#ifdef UTF2000
-      else if (BUFBYTE_FIRST_BYTE_P (c))
-       {
-       }
-#else
       else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
        { /* Processing Leading Byte */
          ch = 0;
@@ -5188,7 +5401,6 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
            }
          char_boundary = 0;
        }
-#endif /* not UTF2000 */
       else
        {                       /* Processing Non-ASCII character */
          charmask = (half == 0 ? 0x7F : 0xFF);
@@ -5272,6 +5484,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
            }
        }
     }
+#endif /* not UTF2000 */
 
 #ifdef ENABLE_COMPOSITE_CHARS
   if (in_composite)
@@ -5285,7 +5498,11 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
     }
 #endif /* ENABLE_COMPOSITE_CHARS */
 
+#ifdef UTF2000
+  if ( (char_boundary == 0) && flags & CODING_STATE_END)
+#else
   if (char_boundary && flags & CODING_STATE_END)
+#endif
     {
       restore_left_to_right_direction (codesys, dst, &flags, 0);
       ensure_normal_shift (str, dst);