X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Ftext-coding.c;h=187d2a90e82e815323ee76494f8f47ac7044e259;hb=a699139716d7a947173ebc9a7130cc0eead5da7f;hp=073a4181651d0ace2e90190c5dee6db4913a5638;hpb=88368240a2bfbadac25b85a1ed4b5c783957ceee;p=chise%2Fxemacs-chise.git- diff --git a/src/text-coding.c b/src/text-coding.c index 073a418..187d2a9 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -2260,7 +2260,129 @@ struct decoding_stream }; #ifdef UTF2000 -extern Lisp_Object Vcharacter_composition_table; +extern Lisp_Object Qcomposition; + +INLINE_HEADER void +decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst); +INLINE_HEADER void +decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst) +{ + if ( str->er_counter > 0) + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + str->er_counter = 0; + } +} + +void decode_add_er_char (struct decoding_stream *str, Emchar character, + unsigned_char_dynarr* dst); +void +decode_add_er_char (struct decoding_stream *str, Emchar c, + unsigned_char_dynarr* dst) +{ + if (str->er_counter == 0) + { + if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys) + && (c == '&') ) + { + str->er_buf[0] = '&'; + str->er_counter++; + } + else + DECODE_ADD_UCS_CHAR (c, dst); + } + else if (c == ';') + { + Lisp_Object string = make_string (str->er_buf, + str->er_counter); + Lisp_Object rest = Vcoded_charset_entity_reference_alist; + Lisp_Object cell; + Lisp_Object ret; + Lisp_Object pat; + Lisp_Object ccs; + int base; + + while (!NILP (rest)) + { + cell = Fcar (rest); + ccs = Fcar (cell); + if (NILP (ccs = Ffind_charset (ccs))) + continue; + + cell = Fcdr (cell); + ret = Fcar (cell); + if (STRINGP (ret)) + pat = ret; + else + continue; + + cell = Fcdr (cell); + cell = Fcdr (cell); + ret = Fcar (cell); + if (EQ (ret, Qd)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9]+\\)$")); + base = 10; + } + else if (EQ (ret, Qx)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9a-f]+\\)$")); + base = 16; + } + else if (EQ (ret, QX)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9A-F]+\\)$")); + base = 16; + } + else + continue; + + if (!NILP (Fstring_match (pat, string, Qnil, Qnil))) + { + int code + = XINT (Fstring_to_number + (Fsubstring (string, + Fmatch_beginning (make_int (1)), + Fmatch_end (make_int (1))), + make_int (base))); + + DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst); + goto decoded; + } + rest = Fcdr (rest); + } + if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"), + string, Qnil, Qnil))) + { + int code + = XINT (Fstring_to_number + (Fsubstring (string, + Fmatch_beginning (make_int (1)), + Fmatch_end (make_int (1))), + make_int (16))); + + DECODE_ADD_UCS_CHAR (code, dst); + } + else + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + Dynarr_add (dst, ';'); + } + decoded: + str->er_counter = 0; + } + else if ( (str->er_counter >= 16) || (c >= 0x7F) ) + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + str->er_counter = 0; + DECODE_ADD_UCS_CHAR (c, dst); + } + else + str->er_buf[str->er_counter++] = c; +} INLINE_HEADER void COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst); @@ -2270,27 +2392,26 @@ COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst) unsigned i; for (i = 0; i < str->combined_char_count; i++) - DECODE_ADD_UCS_CHAR (str->combined_chars[i], dst); + decode_add_er_char (str, str->combined_chars[i], dst); str->combined_char_count = 0; str->combining_table = Qnil; } -void COMPOSE_ADD_CHAR(struct decoding_stream *str, Emchar character, - unsigned_char_dynarr* dst); +void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character, + unsigned_char_dynarr* dst); void -COMPOSE_ADD_CHAR(struct decoding_stream *str, - Emchar character, unsigned_char_dynarr* dst) +COMPOSE_ADD_CHAR (struct decoding_stream *str, + Emchar character, unsigned_char_dynarr* dst) { if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys)) - DECODE_ADD_UCS_CHAR (character, dst); - else if (!CHAR_TABLEP (str->combining_table)) + decode_add_er_char (str, character, dst); + else if (!CONSP (str->combining_table)) { Lisp_Object ret - = get_char_id_table (XCHAR_TABLE(Vcharacter_composition_table), - character); + = Fget_char_attribute (make_char (character), Qcomposition, Qnil); if (NILP (ret)) - DECODE_ADD_UCS_CHAR (character, dst); + decode_add_er_char (str, character, dst); else { str->combined_chars[0] = character; @@ -2300,19 +2421,15 @@ COMPOSE_ADD_CHAR(struct decoding_stream *str, } else { - Lisp_Object ret - = get_char_id_table (XCHAR_TABLE(str->combining_table), - character); + Lisp_Object ret = Fcdr (Fassq (make_char (character), str->combining_table)); if (CHARP (ret)) { Emchar char2 = XCHARVAL (ret); - ret = - get_char_id_table (XCHAR_TABLE(Vcharacter_composition_table), - char2); + ret = Fget_char_attribute (make_char (character), Qcomposition, Qnil); if (NILP (ret)) { - DECODE_ADD_UCS_CHAR (char2, dst); + decode_add_er_char (str, character, dst); str->combined_char_count = 0; str->combining_table = Qnil; } @@ -2323,15 +2440,10 @@ COMPOSE_ADD_CHAR(struct decoding_stream *str, str->combining_table = ret; } } - else if (CHAR_TABLEP (ret)) - { - str->combined_chars[str->combined_char_count++] = character; - str->combining_table = ret; - } else { COMPOSE_FLUSH_CHARS (str, dst); - DECODE_ADD_UCS_CHAR (character, dst); + decode_add_er_char (str, character, dst); } } } @@ -4034,7 +4146,6 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, unsigned int cpos = str->cpos; eol_type_t eol_type = str->eol_type; unsigned char counter = str->counter; - unsigned char er_counter = str->er_counter; while (n--) { @@ -4043,127 +4154,17 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, { if ( c < ' ' ) { - if ( er_counter > 0) - { - Dynarr_add_many (dst, str->er_buf, er_counter); - er_counter = 0; - } + COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); DECODE_ADD_UCS_CHAR (c, dst); } else if ( c < 0xC0 ) - { - if (er_counter == 0) - { - if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys) - && (c == '&') ) - { - str->er_buf[0] = '&'; - er_counter++; - } - else - DECODE_ADD_UCS_CHAR (c, dst); - } - else if (c == ';') - { - Lisp_Object string = make_string (str->er_buf, er_counter); - Lisp_Object rest = Vcoded_charset_entity_reference_alist; - Lisp_Object cell; - Lisp_Object ret; - Lisp_Object pat; - Lisp_Object ccs; - int base; - - while (!NILP (rest)) - { - cell = Fcar (rest); - ccs = Fcar (cell); - if (NILP (ccs = Ffind_charset (ccs))) - continue; - - cell = Fcdr (cell); - ret = Fcar (cell); - if (STRINGP (ret)) - pat = ret; - else - continue; - - cell = Fcdr (cell); - cell = Fcdr (cell); - ret = Fcar (cell); - if (EQ (ret, Qd)) - { - pat = concat3 (build_string ("^&"), - pat, build_string ("\\([0-9]+\\)$")); - base = 10; - } - else if (EQ (ret, Qx)) - { - pat = concat3 (build_string ("^&"), - pat, - build_string ("\\([0-9a-f]+\\)$")); - base = 16; - } - else if (EQ (ret, QX)) - { - pat = concat3 (build_string ("^&"), - pat, - build_string ("\\([0-9A-F]+\\)$")); - base = 16; - } - else - continue; - - if (!NILP (Fstring_match (pat, string, Qnil, Qnil))) - { - int code - = XINT (Fstring_to_number - (Fsubstring (string, - Fmatch_beginning - (make_int (1)), - Fmatch_end (make_int (1))), - make_int (base))); - - DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst); - goto decoded; - } - rest = Fcdr (rest); - } - if (!NILP (Fstring_match - (build_string ("^&MCS-\\([0-9A-F]+\\)$"), - string, Qnil, Qnil))) - { - int code - = XINT (Fstring_to_number - (Fsubstring (string, - Fmatch_beginning - (make_int (1)), - Fmatch_end (make_int (1))), - make_int (16))); - - DECODE_ADD_UCS_CHAR (code, dst); - } - else - { - Dynarr_add_many (dst, str->er_buf, er_counter); - Dynarr_add (dst, ';'); - } - decoded: - er_counter = 0; - } - else if ( (er_counter >= 16) || (c >= 0x7F) ) - { - Dynarr_add_many (dst, str->er_buf, er_counter); - er_counter = 0; - DECODE_ADD_UCS_CHAR (c, dst); - } - else - str->er_buf[er_counter++] = c; - } + /* decode_add_er_char (str, c, dst); */ + COMPOSE_ADD_CHAR (str, c, dst); else { - Dynarr_add_many (dst, str->er_buf, er_counter); - er_counter = 0; + /* decode_flush_er_chars (str, dst); */ if ( c < 0xE0 ) { cpos = c & 0x1f; @@ -4196,7 +4197,8 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, cpos = ( cpos << 6 ) | ( c & 0x3f ); if (counter == 1) { - DECODE_ADD_UCS_CHAR (cpos, dst); + /* DECODE_ADD_UCS_CHAR (cpos, dst); */ + COMPOSE_ADD_CHAR (str, cpos, dst); cpos = 0; counter = 0; } @@ -4205,6 +4207,8 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, } else { + COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); decode_output_utf8_partial_char (counter, cpos, dst); DECODE_ADD_BINARY_CHAR (c, dst); cpos = 0; @@ -4215,11 +4219,8 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, if (flags & CODING_STATE_END) { - if ( er_counter > 0) - { - Dynarr_add_many (dst, str->er_buf, er_counter); - er_counter = 0; - } + COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); if (counter > 0) { decode_output_utf8_partial_char (counter, cpos, dst); @@ -4230,7 +4231,6 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, str->flags = flags; str->cpos = cpos; str->counter = counter; - str->er_counter = er_counter; } void @@ -5196,6 +5196,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, case ISO_ESC_LITERAL: COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); DECODE_ADD_BINARY_CHAR (c, dst); break; @@ -5217,6 +5218,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, /* Output the (possibly invalid) sequence */ int i; COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); for (i = 0; i < str->iso2022.esc_bytes_index; i++) DECODE_ADD_BINARY_CHAR (str->iso2022.esc_bytes[i], dst); flags &= CODING_STATE_ISO2022_LOCK; @@ -5228,6 +5230,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, escape sequence; it could mess things up anyway. Just add it now. */ COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); DECODE_ADD_BINARY_CHAR (c, dst); } } @@ -5244,6 +5247,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, if (counter) { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); while (counter > 0) { counter--; @@ -5260,12 +5264,14 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, if (flags & CODING_STATE_SS2) { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst); flags &= ~CODING_STATE_SS2; } if (flags & CODING_STATE_SS3) { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst); flags &= ~CODING_STATE_SS3; } @@ -5277,6 +5283,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, if (c == '\r') { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); if (eol_type == EOL_CR) Dynarr_add (dst, '\n'); else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR) @@ -5300,6 +5307,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, if (!parse_iso2022_esc (coding_system, &str->iso2022, c, &flags, 1)) { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); DECODE_ADD_BINARY_CHAR (c, dst); } } @@ -5315,6 +5323,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, if (c == '\r') { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); if (eol_type == EOL_CR) Dynarr_add (dst, '\n'); else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR) @@ -5351,6 +5360,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, to preserve it for the output. */ { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); while (counter > 0) { counter--; @@ -5454,6 +5464,7 @@ decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, if (flags & CODING_STATE_END) { COMPOSE_FLUSH_CHARS (str, dst); + decode_flush_er_chars (str, dst); DECODE_OUTPUT_PARTIAL_CHAR (cpos); } str->flags = flags;