Sync with r21-2-19-utf-2000-0_4-1. r21-4-10-kanon-0_4-1
authortomo <tomo>
Sat, 14 Dec 2002 16:48:54 +0000 (16:48 +0000)
committertomo <tomo>
Sat, 14 Dec 2002 16:48:54 +0000 (16:48 +0000)
src/ChangeLog
src/file-coding.c
src/mule-charset.c

index 631902a..0f13e20 100644 (file)
@@ -1,3 +1,13 @@
+1999-08-25  MORIOKA Tomohiko  <tomo@etl.go.jp>
+
+       * mule-charset.c (syms_of_mule_charset): Update to
+       0.4 (Shin-Imamiya).
+
+1999-07-13 Daiki Ueno <ueno@ueda.info.waseda.ac.jp>
+
+       * file-coding.c (encode_coding_sjis): New implementation for
+       UTF2000.  (decode_coding_sjis): Ditto.
+
 1999-06-17  MORIOKA Tomohiko  <tomo@etl.go.jp>
 
        * mule-charset.c, character.h (Bytecount rep_bytes_by_first_byte):
index 1127899..c918dcd 100644 (file)
@@ -3087,10 +3087,16 @@ decode_coding_sjis (Lstream *decoding, const Extbyte *src,
            {
              unsigned char e1, e2;
 
-             Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
              DECODE_SJIS (ch, c, e1, e2);
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_japanese_jisx0208,
+                                           e1 & 0x7F,
+                                           e2 & 0x7F), dst);
+#else
+             Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
              Dynarr_add (dst, e1);
              Dynarr_add (dst, e2);
+#endif
            }
          else
            {
@@ -3106,8 +3112,13 @@ decode_coding_sjis (Lstream *decoding, const Extbyte *src,
            ch = c;
          else if (BYTE_SJIS_KATAKANA_P (c))
            {
+#ifdef UTF2000
+             DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_katakana_jisx0201,
+                                           c & 0x7F, 0), dst);
+#else
              Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201);
              Dynarr_add (dst, c);
+#endif
            }
          else
            DECODE_ADD_BINARY_CHAR (c, dst);
@@ -3131,10 +3142,82 @@ encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
   unsigned int flags  = str->flags;
   unsigned int ch     = str->ch;
   eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+#ifdef UTF2000
+  unsigned char char_boundary = str->iso2022.current_char_boundary;
+#endif
 
   while (n--)
     {
       Bufbyte c = *src++;
+#ifdef UTF2000
+      switch (char_boundary)
+       {
+       case 0:
+         if ( c >= 0xfc )
+           {
+             ch = c & 0x01;
+             char_boundary = 5;
+           }
+         else if ( c >= 0xf8 )
+           {
+             ch = c & 0x03;
+             char_boundary = 4;
+           }
+         else if ( c >= 0xf0 )
+           {
+             ch = c & 0x07;
+             char_boundary = 3;
+           }
+         else if ( c >= 0xe0 )
+           {
+             ch = c & 0x0f;
+             char_boundary = 2;
+           }
+         else if ( c >= 0xc0 )
+           {
+             ch = c & 0x1f;
+             char_boundary = 1;
+           }
+         else
+           {
+             ch = 0;
+             if (c == '\n')
+               {
+                 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+                   Dynarr_add (dst, '\r');
+                 if (eol_type != EOL_CR)
+                   Dynarr_add (dst, c);
+               }
+             else
+               Dynarr_add (dst, c);
+             char_boundary = 0;
+           }
+         break;
+       case 1:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         {
+           Lisp_Object charset;
+           unsigned int c1, c2, s1, s2;
+           
+           BREAKUP_CHAR (ch, charset, c1, c2);
+           if (EQ(charset, Vcharset_katakana_jisx0201))
+             {
+               Dynarr_add (dst, c1 | 0x80);
+             }
+           else if (EQ(charset, Vcharset_japanese_jisx0208))
+             {
+               ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+               Dynarr_add (dst, s1);
+               Dynarr_add (dst, s2);
+             }
+         }
+         char_boundary = 0;
+         break;
+       default:
+         ch = ( ch << 6 ) | ( c & 0x3f );
+         char_boundary--;
+       }
+#else
       if (c == '\n')
        {
          if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
@@ -3171,10 +3254,14 @@ encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
              ch = 0;
            }
        }
+#endif
     }
 
   str->flags = flags;
   str->ch    = ch;
+#ifdef UTF2000
+  str->iso2022.current_char_boundary = char_boundary;
+#endif
 }
 
 DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
index 736280a..6b053e8 100644 (file)
@@ -1414,7 +1414,7 @@ syms_of_mule_charset (void)
   defsymbol (&Qcomposite,              "composite");
 
 #ifdef UTF2000
-  Vutf_2000_version = build_string("0.3 (Imamiya)");
+  Vutf_2000_version = build_string("0.4 (Shin-Imamiya)");
   DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
 Version number of UTF-2000.
 */ );