X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Ftext-coding.c;h=0c0f694deb2a5bea4c6afff2505678724cad74d4;hb=83ee3c208d8c8a7f0a8e80fbedd22edb0a7a9b0e;hp=6bd8841f1456c310eecf544b7eb50765195e1757;hpb=3447b9eeb45a3c97fd6d839fcad20d901215411b;p=chise%2Fxemacs-chise.git diff --git a/src/text-coding.c b/src/text-coding.c index 6bd8841f..0c0f694 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -1,7 +1,7 @@ /* Code conversion functions. Copyright (C) 1991, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1999,2000,2001 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko This file is part of XEmacs. @@ -23,6 +23,7 @@ Boston, MA 02111-1307, USA. */ /* Synched up with: Mule 2.3. Not in FSF. */ /* Rewritten by Ben Wing . */ +/* Rewritten by MORIOKA Tomohiko for XEmacs UTF-2000. */ #include #include "lisp.h" @@ -2262,6 +2263,128 @@ struct decoding_stream extern Lisp_Object Vcharacter_composition_table; INLINE_HEADER void +decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst); +INLINE_HEADER void +decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst) +{ + if ( str->er_counter > 0) + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + str->er_counter = 0; + } +} + +void decode_add_er_char(struct decoding_stream *str, Emchar character, + unsigned_char_dynarr* dst); +void +decode_add_er_char(struct decoding_stream *str, Emchar c, + unsigned_char_dynarr* dst) +{ + if (str->er_counter == 0) + { + if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys) + && (c == '&') ) + { + str->er_buf[0] = '&'; + str->er_counter++; + } + else + DECODE_ADD_UCS_CHAR (c, dst); + } + else if (c == ';') + { + Lisp_Object string = make_string (str->er_buf, + str->er_counter); + Lisp_Object rest = Vcoded_charset_entity_reference_alist; + Lisp_Object cell; + Lisp_Object ret; + Lisp_Object pat; + Lisp_Object ccs; + int base; + + while (!NILP (rest)) + { + cell = Fcar (rest); + ccs = Fcar (cell); + if (NILP (ccs = Ffind_charset (ccs))) + continue; + + cell = Fcdr (cell); + ret = Fcar (cell); + if (STRINGP (ret)) + pat = ret; + else + continue; + + cell = Fcdr (cell); + cell = Fcdr (cell); + ret = Fcar (cell); + if (EQ (ret, Qd)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9]+\\)$")); + base = 10; + } + else if (EQ (ret, Qx)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9a-f]+\\)$")); + base = 16; + } + else if (EQ (ret, QX)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9A-F]+\\)$")); + base = 16; + } + else + continue; + + if (!NILP (Fstring_match (pat, string, Qnil, Qnil))) + { + int code + = XINT (Fstring_to_number + (Fsubstring (string, + Fmatch_beginning (make_int (1)), + Fmatch_end (make_int (1))), + make_int (base))); + + DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst); + goto decoded; + } + rest = Fcdr (rest); + } + if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"), + string, Qnil, Qnil))) + { + int code + = XINT (Fstring_to_number + (Fsubstring (string, + Fmatch_beginning (make_int (1)), + Fmatch_end (make_int (1))), + make_int (16))); + + DECODE_ADD_UCS_CHAR (code, dst); + } + else + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + Dynarr_add (dst, ';'); + } + decoded: + str->er_counter = 0; + } + else if ( (str->er_counter >= 16) || (c >= 0x7F) ) + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + str->er_counter = 0; + DECODE_ADD_UCS_CHAR (c, dst); + } + else + str->er_buf[str->er_counter++] = c; +} + +INLINE_HEADER void COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst); INLINE_HEADER void COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst) @@ -4033,133 +4156,23 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, unsigned int cpos = str->cpos; eol_type_t eol_type = str->eol_type; unsigned char counter = str->counter; - unsigned char er_counter = str->er_counter; while (n--) { unsigned char c = *(unsigned char *)src++; if (counter == 0) { - if ( c < 0xC0 ) + if ( c < ' ' ) { - if (!CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys)) - { - DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); - DECODE_ADD_UCS_CHAR (c, dst); - } - else if (er_counter == 0) - { - if (c == '&') - { - str->er_buf[0] = '&'; - er_counter++; - } - else - { - DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); - DECODE_ADD_UCS_CHAR (c, dst); - } - } - else if (c == ';') - { - Lisp_Object string = make_string (str->er_buf, er_counter); - Lisp_Object rest = Vcoded_charset_entity_reference_alist; - Lisp_Object cell; - Lisp_Object ret; - Lisp_Object pat; - Lisp_Object ccs; - int base; - - while (!NILP (rest)) - { - cell = Fcar (rest); - ccs = Fcar (cell); - if (NILP (ccs = Ffind_charset (ccs))) - continue; - - cell = Fcdr (cell); - ret = Fcar (cell); - if (STRINGP (ret)) - pat = ret; - else - continue; - - cell = Fcdr (cell); - cell = Fcdr (cell); - ret = Fcar (cell); - if (EQ (ret, Qd)) - { - pat = concat3 (build_string ("^&"), - pat, build_string ("\\([0-9]+\\)$")); - base = 10; - } - else if (EQ (ret, Qx)) - { - pat = concat3 (build_string ("^&"), - pat, - build_string ("\\([0-9a-f]+\\)$")); - base = 16; - } - else if (EQ (ret, QX)) - { - pat = concat3 (build_string ("^&"), - pat, - build_string ("\\([0-9A-F]+\\)$")); - base = 16; - } - else - continue; - - if (!NILP (Fstring_match (pat, string, Qnil, Qnil))) - { - int code - = XINT (Fstring_to_number - (Fsubstring (string, - Fmatch_beginning - (make_int (1)), - Fmatch_end (make_int (1))), - make_int (base))); - - DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst); - goto decoded; - } - rest = Fcdr (rest); - } - if (!NILP (Fstring_match - (build_string ("^&MCS-\\([0-9A-F]+\\)$"), - string, Qnil, Qnil))) - { - int code - = XINT (Fstring_to_number - (Fsubstring (string, - Fmatch_beginning - (make_int (1)), - Fmatch_end (make_int (1))), - make_int (16))); - - DECODE_ADD_UCS_CHAR (code, dst); - } - else - { - Dynarr_add_many (dst, str->er_buf, er_counter); - Dynarr_add (dst, ';'); - } - decoded: - er_counter = 0; - } - else if ( (er_counter >= 16) || (c <= ' ') || (c >= 0x7F) ) - { - Dynarr_add_many (dst, str->er_buf, er_counter); - er_counter = 0; - DECODE_ADD_UCS_CHAR (c, dst); - } - else - str->er_buf[er_counter++] = c; + decode_flush_er_chars (str, dst); + DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); + DECODE_ADD_UCS_CHAR (c, dst); } + else if ( c < 0xC0 ) + decode_add_er_char (str, c, dst); else { - Dynarr_add_many (dst, str->er_buf, er_counter); - er_counter = 0; + decode_flush_er_chars (str, dst); if ( c < 0xE0 ) { cpos = c & 0x1f; @@ -4211,22 +4224,17 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, if (flags & CODING_STATE_END) { + decode_flush_er_chars (str, dst); if (counter > 0) { decode_output_utf8_partial_char (counter, cpos, dst); cpos = 0; counter = 0; } - else if ( er_counter > 0) - { - Dynarr_add_many (dst, str->er_buf, er_counter); - er_counter = 0; - } } str->flags = flags; str->cpos = cpos; str->counter = counter; - str->er_counter = er_counter; } void