From: tomo Date: Sat, 10 Aug 2002 15:12:36 +0000 (+0000) Subject: (char_encode_as_entity_reference): New function. X-Git-Tag: r21-2-44-utf-2000-0_19-nc4-b5-1-2~12 X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=b687548cf76ae4199169cc4a2f717ba57c455c73;p=chise%2Fxemacs-chise.git.1 (char_encode_as_entity_reference): New function. (char_encode_utf8): Use `char_encode_as_entity_reference'. --- diff --git a/src/text-coding.c b/src/text-coding.c index b9f3b20..f7cbe9e 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -2281,199 +2281,6 @@ struct decoding_stream struct detection_state decst; }; -#ifdef UTF2000 -extern Lisp_Object Qcomposition; - -INLINE_HEADER void -decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst); -INLINE_HEADER void -decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst) -{ - if ( str->er_counter > 0) - { - Dynarr_add_many (dst, str->er_buf, str->er_counter); - str->er_counter = 0; - } -} - -void decode_add_er_char (struct decoding_stream *str, Emchar character, - unsigned_char_dynarr* dst); -void -decode_add_er_char (struct decoding_stream *str, Emchar c, - unsigned_char_dynarr* dst) -{ - if (str->er_counter == 0) - { - if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys) - && (c == '&') ) - { - str->er_buf[0] = '&'; - str->er_counter++; - } - else - DECODE_ADD_UCS_CHAR (c, dst); - } - else if (c == ';') - { - Lisp_Object string = make_string (str->er_buf, - str->er_counter); - Lisp_Object rest = Vcoded_charset_entity_reference_alist; - Lisp_Object cell; - Lisp_Object ret; - Lisp_Object pat; - Lisp_Object ccs; - int base; - - while (!NILP (rest)) - { - cell = Fcar (rest); - ccs = Fcar (cell); - if (NILP (ccs = Ffind_charset (ccs))) - continue; - - cell = Fcdr (cell); - ret = Fcar (cell); - if (STRINGP (ret)) - pat = ret; - else - continue; - - cell = Fcdr (cell); - cell = Fcdr (cell); - ret = Fcar (cell); - if (EQ (ret, Qd)) - { - pat = concat3 (build_string ("^&"), - pat, build_string ("\\([0-9]+\\)$")); - base = 10; - } - else if (EQ (ret, Qx)) - { - pat = concat3 (build_string ("^&"), - pat, build_string ("\\([0-9a-f]+\\)$")); - base = 16; - } - else if (EQ (ret, QX)) - { - pat = concat3 (build_string ("^&"), - pat, build_string ("\\([0-9A-F]+\\)$")); - base = 16; - } - else - continue; - - if (!NILP (Fstring_match (pat, string, Qnil, Qnil))) - { - int code - = XINT (Fstring_to_number - (Fsubstring (string, - Fmatch_beginning (make_int (1)), - Fmatch_end (make_int (1))), - make_int (base))); - - DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst); - goto decoded; - } - rest = Fcdr (rest); - } - if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"), - string, Qnil, Qnil))) - { - int code - = XINT (Fstring_to_number - (Fsubstring (string, - Fmatch_beginning (make_int (1)), - Fmatch_end (make_int (1))), - make_int (16))); - - DECODE_ADD_UCS_CHAR (code, dst); - } - else - { - Dynarr_add_many (dst, str->er_buf, str->er_counter); - Dynarr_add (dst, ';'); - } - decoded: - str->er_counter = 0; - } - else if ( (str->er_counter >= 16) || (c >= 0x7F) ) - { - Dynarr_add_many (dst, str->er_buf, str->er_counter); - str->er_counter = 0; - DECODE_ADD_UCS_CHAR (c, dst); - } - else - str->er_buf[str->er_counter++] = c; -} - -INLINE_HEADER void -COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst); -INLINE_HEADER void -COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst) -{ - unsigned i; - - for (i = 0; i < str->combined_char_count; i++) - decode_add_er_char (str, str->combined_chars[i], dst); - str->combined_char_count = 0; - str->combining_table = Qnil; -} - -void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character, - unsigned_char_dynarr* dst); -void -COMPOSE_ADD_CHAR (struct decoding_stream *str, - Emchar character, unsigned_char_dynarr* dst) -{ - if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys)) - decode_add_er_char (str, character, dst); - else if (!CONSP (str->combining_table)) - { - Lisp_Object ret - = Fget_char_attribute (make_char (character), Qcomposition, Qnil); - - if (NILP (ret)) - decode_add_er_char (str, character, dst); - else - { - str->combined_chars[0] = character; - str->combined_char_count = 1; - str->combining_table = ret; - } - } - else - { - Lisp_Object ret = Fcdr (Fassq (make_char (character), str->combining_table)); - - if (CHARP (ret)) - { - Emchar char2 = XCHARVAL (ret); - ret = Fget_char_attribute (make_char (character), Qcomposition, Qnil); - if (NILP (ret)) - { - decode_add_er_char (str, character, dst); - str->combined_char_count = 0; - str->combining_table = Qnil; - } - else - { - str->combined_chars[0] = char2; - str->combined_char_count = 1; - str->combining_table = ret; - } - } - else - { - COMPOSE_FLUSH_CHARS (str, dst); - decode_add_er_char (str, character, dst); - } - } -} -#else /* not UTF2000 */ -#define COMPOSE_FLUSH_CHARS(str, dst) -#define COMPOSE_ADD_CHAR(str, ch, dst) DECODE_ADD_UCS_CHAR (ch, dst) -#endif /* UTF2000 */ - static ssize_t decoding_reader (Lstream *stream, unsigned char *data, size_t size); static ssize_t decoding_writer (Lstream *stream, @@ -3368,6 +3175,278 @@ text_encode_generic (Lstream *encoding, const Bufbyte *src, } +#ifdef UTF2000 +/************************************************************************/ +/* entity reference */ +/************************************************************************/ + +INLINE_HEADER void +decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst); +INLINE_HEADER void +decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst) +{ + if ( str->er_counter > 0) + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + str->er_counter = 0; + } +} + +void decode_add_er_char (struct decoding_stream *str, Emchar character, + unsigned_char_dynarr* dst); +void +decode_add_er_char (struct decoding_stream *str, Emchar c, + unsigned_char_dynarr* dst) +{ + if (str->er_counter == 0) + { + if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys) + && (c == '&') ) + { + str->er_buf[0] = '&'; + str->er_counter++; + } + else + DECODE_ADD_UCS_CHAR (c, dst); + } + else if (c == ';') + { + Lisp_Object string = make_string (str->er_buf, + str->er_counter); + Lisp_Object rest = Vcoded_charset_entity_reference_alist; + Lisp_Object cell; + Lisp_Object ret; + Lisp_Object pat; + Lisp_Object ccs; + int base; + + while (!NILP (rest)) + { + cell = Fcar (rest); + ccs = Fcar (cell); + if (NILP (ccs = Ffind_charset (ccs))) + continue; + + cell = Fcdr (cell); + ret = Fcar (cell); + if (STRINGP (ret)) + pat = ret; + else + continue; + + cell = Fcdr (cell); + cell = Fcdr (cell); + ret = Fcar (cell); + if (EQ (ret, Qd)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9]+\\)$")); + base = 10; + } + else if (EQ (ret, Qx)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9a-f]+\\)$")); + base = 16; + } + else if (EQ (ret, QX)) + { + pat = concat3 (build_string ("^&"), + pat, build_string ("\\([0-9A-F]+\\)$")); + base = 16; + } + else + continue; + + if (!NILP (Fstring_match (pat, string, Qnil, Qnil))) + { + int code + = XINT (Fstring_to_number + (Fsubstring (string, + Fmatch_beginning (make_int (1)), + Fmatch_end (make_int (1))), + make_int (base))); + + DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst); + goto decoded; + } + rest = Fcdr (rest); + } + if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"), + string, Qnil, Qnil))) + { + int code + = XINT (Fstring_to_number + (Fsubstring (string, + Fmatch_beginning (make_int (1)), + Fmatch_end (make_int (1))), + make_int (16))); + + DECODE_ADD_UCS_CHAR (code, dst); + } + else + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + Dynarr_add (dst, ';'); + } + decoded: + str->er_counter = 0; + } + else if ( (str->er_counter >= 16) || (c >= 0x7F) ) + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + str->er_counter = 0; + DECODE_ADD_UCS_CHAR (c, dst); + } + else + str->er_buf[str->er_counter++] = c; +} + +void char_encode_as_entity_reference (Emchar ch, char* buf); +void +char_encode_as_entity_reference (Emchar ch, char* buf) +{ + Lisp_Object rest = Vcoded_charset_entity_reference_alist; + Lisp_Object cell; + Lisp_Object ccs; + int format_columns, idx; + char format[18]; + + while (!NILP (rest)) + { + cell = Fcar (rest); + ccs = Fcar (cell); + if (!NILP (ccs = Ffind_charset (ccs))) + { + int code_point = charset_code_point (ccs, ch); + + if ( code_point >= 0 ) + { + Lisp_Object ret; + + cell = Fcdr (cell); + ret = Fcar (cell); + if (STRINGP (ret) && ((idx = XSTRING_LENGTH (ret)) <= 6)) + { + format[0] = '&'; + strncpy (&format[1], XSTRING_DATA (ret), idx); + idx++; + } + else + continue; + + cell = Fcdr (cell); + ret = Fcar (cell); + if (INTP (ret)) + { + format[idx++] = '%'; + format_columns = XINT (ret); + if ( (2 <= format_columns) && (format_columns <= 8) ) + { + format [idx++] = '0'; + format [idx++] = '0' + format_columns; + } + } + + cell = Fcdr (cell); + ret = Fcar (cell); + if (EQ (ret, Qd)) + format [idx++] = 'd'; + else if (EQ (ret, Qx)) + format [idx++] = 'x'; + else if (EQ (ret, QX)) + format [idx++] = 'X'; + else + continue; + format [idx++] = ';'; + format [idx++] = 0; + + sprintf (buf, format, code_point); + return; + } + } + rest = Fcdr (rest); + } + sprintf (buf, "&MCS-%08X;", ch); +} + + +/************************************************************************/ +/* character composition */ +/************************************************************************/ +extern Lisp_Object Qcomposition; + +INLINE_HEADER void +COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst); +INLINE_HEADER void +COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst) +{ + unsigned i; + + for (i = 0; i < str->combined_char_count; i++) + decode_add_er_char (str, str->combined_chars[i], dst); + str->combined_char_count = 0; + str->combining_table = Qnil; +} + +void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character, + unsigned_char_dynarr* dst); +void +COMPOSE_ADD_CHAR (struct decoding_stream *str, + Emchar character, unsigned_char_dynarr* dst) +{ + if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys)) + decode_add_er_char (str, character, dst); + else if (!CONSP (str->combining_table)) + { + Lisp_Object ret + = Fget_char_attribute (make_char (character), Qcomposition, Qnil); + + if (NILP (ret)) + decode_add_er_char (str, character, dst); + else + { + str->combined_chars[0] = character; + str->combined_char_count = 1; + str->combining_table = ret; + } + } + else + { + Lisp_Object ret + = Fcdr (Fassq (make_char (character), str->combining_table)); + + if (CHARP (ret)) + { + Emchar char2 = XCHARVAL (ret); + ret = Fget_char_attribute (make_char (character), Qcomposition, + Qnil); + if (NILP (ret)) + { + decode_add_er_char (str, character, dst); + str->combined_char_count = 0; + str->combining_table = Qnil; + } + else + { + str->combined_chars[0] = char2; + str->combined_char_count = 1; + str->combining_table = ret; + } + } + else + { + COMPOSE_FLUSH_CHARS (str, dst); + decode_add_er_char (str, character, dst); + } + } +} +#else /* not UTF2000 */ +#define COMPOSE_FLUSH_CHARS(str, dst) +#define COMPOSE_ADD_CHAR(str, ch, dst) DECODE_ADD_UCS_CHAR (ch, dst) +#endif /* UTF2000 */ + + /************************************************************************/ /* Shift-JIS methods */ /************************************************************************/ @@ -3815,17 +3894,17 @@ decode_coding_big5 (Lstream *decoding, const Extbyte *src, decode_flush_er_chars (str, dst); cpos = c; } - else if ( c >= ' ' ) - { - /* DECODE_ADD_BINARY_CHAR (c, dst); */ - decode_add_er_char (str, c, dst); - } - else + else if ( c < ' ' ) { decode_flush_er_chars (str, dst); DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); DECODE_ADD_BINARY_CHAR (c, dst); } + else + { + /* DECODE_ADD_BINARY_CHAR (c, dst); */ + decode_add_er_char (str, c, dst); + } } label_continue_loop:; } @@ -4323,67 +4402,9 @@ char_encode_utf8 (struct encoding_stream *str, Emchar ch, code_point = XINT (ret); else if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys)) { - Lisp_Object rest = Vcoded_charset_entity_reference_alist; - Lisp_Object cell; - Lisp_Object ccs; - int format_columns, idx; - char buf[16], format[16]; + char buf[18]; - while (!NILP (rest)) - { - cell = Fcar (rest); - ccs = Fcar (cell); - if (!NILP (ccs = Ffind_charset (ccs))) - { - if ( (code_point - = charset_code_point (ccs, ch)) >= 0 ) - { - cell = Fcdr (cell); - ret = Fcar (cell); - if (STRINGP (ret) - && ((idx =XSTRING_LENGTH (ret)) <= 6)) - { - strncpy (format, XSTRING_DATA (ret), idx); - } - else - continue; - - cell = Fcdr (cell); - ret = Fcar (cell); - if (INTP (ret)) - { - format [idx++] = '%'; - format_columns = XINT (ret); - if ( (2 <= format_columns) - && (format_columns <= 8) ) - { - format [idx++] = '0'; - format [idx++] = '0' + format_columns; - } - } - - cell = Fcdr (cell); - ret = Fcar (cell); - if (EQ (ret, Qd)) - format [idx++] = 'd'; - else if (EQ (ret, Qx)) - format [idx++] = 'x'; - else if (EQ (ret, QX)) - format [idx++] = 'X'; - else - continue; - format [idx++] = 0; - - sprintf (buf, format, code_point); - Dynarr_add (dst, '&'); - Dynarr_add_many (dst, buf, strlen (buf)); - Dynarr_add (dst, ';'); - return; - } - } - rest = Fcdr (rest); - } - sprintf (buf, "&MCS-%08X;", ch); + char_encode_as_entity_reference (ch, buf); Dynarr_add_many (dst, buf, strlen (buf)); return; }