From 2f0d94b411571ceb4e7608f7f09694856829c555 Mon Sep 17 00:00:00 2001 From: tomo Date: Wed, 16 Jun 1999 14:18:37 +0000 Subject: [PATCH] (decode_coding_iso2022): Code-point arguments of `MAKE_CHAR' must be smaller than 0x80 in UTF2000. (encode_coding_iso2022): New implementation for UTF2000. --- src/file-coding.c | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 228 insertions(+), 11 deletions(-) diff --git a/src/file-coding.c b/src/file-coding.c index 640fb0c..9e9bb5c 100644 --- a/src/file-coding.c +++ b/src/file-coding.c @@ -4837,7 +4837,7 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src, case 2: /* one-byte official */ DECODE_OUTPUT_PARTIAL_CHAR (ch); #ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c, 0), dst); + DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst); #else Dynarr_add (dst, lb); Dynarr_add (dst, c | 0x80); @@ -4849,7 +4849,8 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src, { DECODE_OUTPUT_PARTIAL_CHAR (ch); #ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c, 0), dst); + DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), + dst); #else Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1); Dynarr_add (dst, lb); @@ -4861,7 +4862,9 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src, if (ch) { #ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, ch, c), dst); + DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, + ch & 0x7F, + c & 0x7F), dst); #else Dynarr_add (dst, lb); Dynarr_add (dst, ch | 0x80); @@ -4878,7 +4881,9 @@ decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src, if (ch) { #ifdef UTF2000 - DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, ch, c), dst); + DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, + ch & 0x7F, + c & 0x7F), dst); #else Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2); Dynarr_add (dst, lb); @@ -5007,12 +5012,15 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src, unsigned char char_boundary; struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); unsigned int flags = str->flags; - unsigned int ch = str->ch; + Emchar ch = str->ch; Lisp_Coding_System *codesys = str->codesys; eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); int i; Lisp_Object charset; int half; +#ifdef UTF2000 + unsigned int byte1, byte2; +#endif #ifdef ENABLE_COMPOSITE_CHARS /* flags for handling composite chars. We do a little switcharoo @@ -5029,6 +5037,216 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src, #ifdef ENABLE_COMPOSITE_CHARS back_to_square_n: #endif +#ifdef UTF2000 + while (n--) + { + c = *src++; + + switch (char_boundary) + { + case 0: + if ( c >= 0xfc ) + { + ch = c & 0x01; + char_boundary = 5; + } + else if ( c >= 0xf8 ) + { + ch = c & 0x03; + char_boundary = 4; + } + else if ( c >= 0xf0 ) + { + ch = c & 0x07; + char_boundary = 3; + } + else if ( c >= 0xe0 ) + { + ch = c & 0x0f; + char_boundary = 2; + } + else if ( c >= 0xc0 ) + { + ch = c & 0x1f; + char_boundary = 1; + } + else + { + ch = 0; + + restore_left_to_right_direction (codesys, dst, &flags, 0); + + /* Make sure G0 contains ASCII */ + if ((c > ' ' && c < ISO_CODE_DEL) || + !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys)) + { + ensure_normal_shift (str, dst); + iso2022_designate (Vcharset_ascii, 0, str, dst); + } + + /* If necessary, restore everything to the default state + at end-of-line */ + if (c == '\n' && + !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys))) + { + restore_left_to_right_direction (codesys, dst, &flags, 0); + + ensure_normal_shift (str, dst); + + for (i = 0; i < 4; i++) + { + Lisp_Object initial_charset = + CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); + iso2022_designate (initial_charset, i, str, dst); + } + } + if (c == '\n') + { + if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) + Dynarr_add (dst, '\r'); + if (eol_type != EOL_CR) + Dynarr_add (dst, c); + } + else + { + if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) + && fit_to_be_escape_quoted (c)) + Dynarr_add (dst, ISO_CODE_ESC); + Dynarr_add (dst, c); + } + char_boundary = 0; + } + break; + case 1: + ch = ( ch << 6 ) | ( c & 0x3f ); + + char_boundary = 0; + if ( (0x80 <= ch) && (ch <= 0x9f) ) + { + charmask = (half == 0 ? 0x00 : 0x80); + + if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) + && fit_to_be_escape_quoted (ch)) + Dynarr_add (dst, ISO_CODE_ESC); + /* you asked for it ... */ + Dynarr_add (dst, ch); + } + else + { + int reg; + + BREAKUP_CHAR (ch, charset, byte1, byte2); + ensure_correct_direction (XCHARSET_DIRECTION (charset), + codesys, dst, &flags, 0); + + /* Now determine which register to use. */ + reg = -1; + for (i = 0; i < 4; i++) + { + if (EQ (charset, str->iso2022.charset[i]) || + EQ (charset, + CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))) + { + reg = i; + break; + } + } + + if (reg == -1) + { + if (XCHARSET_GRAPHIC (charset) != 0) + { + if (!NILP (str->iso2022.charset[1]) && + (!CODING_SYSTEM_ISO2022_SEVEN (codesys) || + CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys))) + reg = 1; + else if (!NILP (str->iso2022.charset[2])) + reg = 2; + else if (!NILP (str->iso2022.charset[3])) + reg = 3; + else + reg = 0; + } + else + reg = 0; + } + + iso2022_designate (charset, reg, str, dst); + + /* Now invoke that register. */ + switch (reg) + { + case 0: + ensure_normal_shift (str, dst); + half = 0; + break; + + case 1: + if (CODING_SYSTEM_ISO2022_SEVEN (codesys)) + { + ensure_shift_out (str, dst); + half = 0; + } + else + half = 1; + break; + + case 2: + if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys)) + { + Dynarr_add (dst, ISO_CODE_ESC); + Dynarr_add (dst, 'N'); + half = 0; + } + else + { + Dynarr_add (dst, ISO_CODE_SS2); + half = 1; + } + break; + + case 3: + if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys)) + { + Dynarr_add (dst, ISO_CODE_ESC); + Dynarr_add (dst, 'O'); + half = 0; + } + else + { + Dynarr_add (dst, ISO_CODE_SS3); + half = 1; + } + break; + + default: + abort (); + } + + charmask = (half == 0 ? 0x00 : 0x80); + + switch (XCHARSET_DIMENSION (charset)) + { + case 1: + Dynarr_add (dst, byte1 | charmask); + break; + case 2: + Dynarr_add (dst, byte1 | charmask); + Dynarr_add (dst, byte2 | charmask); + break; + default: + abort (); + } + } + ch =0; + break; + default: + ch = ( ch << 6 ) | ( c & 0x3f ); + char_boundary--; + } + } +#else /* not UTF2000 */ + while (n--) { c = *src++; @@ -5080,11 +5298,6 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src, char_boundary = 1; } -#ifdef UTF2000 - else if (BUFBYTE_FIRST_BYTE_P (c)) - { - } -#else else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch)) { /* Processing Leading Byte */ ch = 0; @@ -5188,7 +5401,6 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src, } char_boundary = 0; } -#endif /* not UTF2000 */ else { /* Processing Non-ASCII character */ charmask = (half == 0 ? 0x7F : 0xFF); @@ -5272,6 +5484,7 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src, } } } +#endif /* not UTF2000 */ #ifdef ENABLE_COMPOSITE_CHARS if (in_composite) @@ -5285,7 +5498,11 @@ encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src, } #endif /* ENABLE_COMPOSITE_CHARS */ +#ifdef UTF2000 + if ( (char_boundary == 0) && flags & CODING_STATE_END) +#else if (char_boundary && flags & CODING_STATE_END) +#endif { restore_left_to_right_direction (codesys, dst, &flags, 0); ensure_normal_shift (str, dst); -- 1.7.10.4